Index: polly/trunk/include/polly/Support/ISLTools.h =================================================================== --- polly/trunk/include/polly/Support/ISLTools.h +++ polly/trunk/include/polly/Support/ISLTools.h @@ -398,6 +398,18 @@ /// /// @return { [DomainDomain[] -> NewDomainRange[]] -> Range[] } isl::union_map applyDomainRange(isl::union_map UMap, isl::union_map Func); + +/// Intersect the range of @p Map with @p Range. +/// +/// Since @p Map is an isl::map, the result will be a single space, even though +/// @p Range is an isl::union_set. This is the only difference to +/// isl::map::intersect_range and isl::union_map::interset_range. +/// +/// @param Map { Domain[] -> Range[] } +/// @param Range { Range[] } +/// +/// @return { Domain[] -> Range[] } +isl::map intersectRange(isl::map Map, isl::union_set Range); } // namespace polly #endif /* POLLY_ISLTOOLS_H */ Index: polly/trunk/include/polly/ZoneAlgo.h =================================================================== --- polly/trunk/include/polly/ZoneAlgo.h +++ polly/trunk/include/polly/ZoneAlgo.h @@ -104,6 +104,10 @@ /// unique ids that do not depend on pointer values. llvm::DenseMap ValueIds; + /// Set of array elements that can be reliably used for zone analysis. + /// { Element[] } + isl::union_set CompatibleElts; + /// Prepare the object before computing the zones of @p S. /// /// @param PassName Name of the pass using this analysis. @@ -112,7 +116,7 @@ ZoneAlgorithm(const char *PassName, Scop *S, llvm::LoopInfo *LI); private: - /// Check whether @p Stmt can be accurately analyzed by zones. + /// Find the array elements that violate the zone analysis assumptions. /// /// What violates our assumptions: /// - A load after a write of the same location; we assume that all reads @@ -123,7 +127,13 @@ /// Scalar reads implicitly always occur before other accesses therefore never /// violate the first condition. There is also at most one write to a scalar, /// satisfying the second condition. - bool isCompatibleStmt(ScopStmt *Stmt); + /// + /// @param Stmt The statement to be analyzed. + /// @param[out] IncompatibleElts Receives the elements that are not + /// zone-analysis compatible. + /// @param[out] AllElts receives all encountered elements. + void collectIncompatibleElts(ScopStmt *Stmt, isl::union_set &IncompatibleElts, + isl::union_set &AllElts); void addArrayReadAccess(MemoryAccess *MA); @@ -134,8 +144,8 @@ isl::union_map makeEmptyUnionMap() const; - /// Check whether @p S can be accurately analyzed by zones. - bool isCompatibleScop(); + /// Find the array elements that can be used for zone analysis. + void collectCompatibleElts(); /// Get the schedule for @p Stmt. /// @@ -227,6 +237,10 @@ isl::map makeValInst(llvm::Value *Val, ScopStmt *UserStmt, llvm::Loop *Scope, bool IsCertain = true); + /// Return whether @p MA can be used for transformations (e.g. OpTree load + /// forwarding, DeLICM mapping). + bool isCompatibleAccess(MemoryAccess *MA); + /// Compute the different zones. void computeCommon(); Index: polly/trunk/lib/Support/ISLTools.cpp =================================================================== --- polly/trunk/lib/Support/ISLTools.cpp +++ polly/trunk/lib/Support/ISLTools.cpp @@ -533,3 +533,8 @@ return std::move(UMap).apply_domain(std::move(LifetedFunc)); } + +isl::map polly::intersectRange(isl::map Map, isl::union_set Range) { + isl::set RangeSet = Range.extract_set(Map.get_space().range()); + return Map.intersect_range(RangeSet); +} \ No newline at end of file Index: polly/trunk/lib/Transform/DeLICM.cpp =================================================================== --- polly/trunk/lib/Transform/DeLICM.cpp +++ polly/trunk/lib/Transform/DeLICM.cpp @@ -1257,10 +1257,7 @@ /// @return True if the computed lifetimes (#Zone) is usable. bool computeZone() { // Check that nothing strange occurs. - if (!isCompatibleScop()) { - DeLICMIncompatible++; - return false; - } + collectCompatibleElts(); isl::union_set EltUnused; isl::union_map EltKnown, EltWritten; @@ -1345,6 +1342,32 @@ continue; } + if (!isa(MA->getAccessInstruction())) { + DEBUG(dbgs() << "Access " << MA + << " pruned because it is not a StoreInst\n"); + OptimizationRemarkMissed R(DEBUG_TYPE, "NotAStore", + MA->getAccessInstruction()); + R << "skipped possible mapping target because non-store instructions " + "are not supported"; + S->getFunction().getContext().diagnose(R); + continue; + } + + isl::union_set TouchedElts = MA->getLatestAccessRelation().range(); + if (!TouchedElts.is_subset(CompatibleElts)) { + DEBUG( + dbgs() + << "Access " << MA + << " is incompatible because it touches incompatible elements\n"); + OptimizationRemarkMissed R(DEBUG_TYPE, "IncompatibleElts", + MA->getAccessInstruction()); + R << "skipped possible mapping target because a target location " + "cannot be reliably analyzed"; + S->getFunction().getContext().diagnose(R); + continue; + } + + assert(isCompatibleAccess(MA)); NumberOfCompatibleTargets++; DEBUG(dbgs() << "Analyzing target access " << MA << "\n"); if (collapseScalarsToStore(MA)) Index: polly/trunk/lib/Transform/ForwardOpTree.cpp =================================================================== --- polly/trunk/lib/Transform/ForwardOpTree.cpp +++ polly/trunk/lib/Transform/ForwardOpTree.cpp @@ -60,7 +60,6 @@ STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); STATISTIC(KnownOutOfQuota, "Analyses aborted because max_operations was reached"); -STATISTIC(KnownIncompatible, "Number of SCoPs incompatible for analysis"); STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); STATISTIC(TotalKnownLoadsForwarded, @@ -259,10 +258,7 @@ isl::union_map MustKnown, KnownFromLoad, KnownFromInit; // Check that nothing strange occurs. - if (!isCompatibleScop()) { - KnownIncompatible++; - return false; - } + collectCompatibleElts(); isl_ctx_reset_error(IslCtx.get()); { Index: polly/trunk/lib/Transform/ZoneAlgo.cpp =================================================================== --- polly/trunk/lib/Transform/ZoneAlgo.cpp +++ polly/trunk/lib/Transform/ZoneAlgo.cpp @@ -154,9 +154,13 @@ #include "polly/Support/GICHelper.h" #include "polly/Support/ISLTools.h" #include "polly/Support/VirtualInstruction.h" +#include "llvm/ADT/Statistic.h" #define DEBUG_TYPE "polly-zone" +STATISTIC(NumIncompatibleArrays, "Number of not zone-analyzable arrays"); +STATISTIC(NumCompatibleArrays, "Number of zone-analyzable arrays"); + using namespace polly; using namespace llvm; @@ -320,7 +324,9 @@ return true; } -bool ZoneAlgorithm::isCompatibleStmt(ScopStmt *Stmt) { +void ZoneAlgorithm::collectIncompatibleElts(ScopStmt *Stmt, + isl::union_set &IncompatibleElts, + isl::union_set &AllElts) { auto Stores = makeEmptyUnionMap(); auto Loads = makeEmptyUnionMap(); @@ -330,7 +336,13 @@ if (!MA->isLatestArrayKind()) continue; - auto AccRel = give(isl_union_map_from_map(getAccessRelationFor(MA).take())); + isl::map AccRelMap = getAccessRelationFor(MA); + isl::union_map AccRel = AccRelMap; + + // To avoid solving any ILP problems, always add entire arrays instead of + // just the elements that are accessed. + auto ArrayElts = isl::set::universe(AccRelMap.get_space().range()); + AllElts = AllElts.add_set(ArrayElts); if (MA->isRead()) { // Reject load after store to same location. @@ -342,7 +354,8 @@ R << " (previous stores: " << Stores; R << ", loading: " << AccRel << ")"; S->getFunction().getContext().diagnose(R); - return false; + + IncompatibleElts = IncompatibleElts.add_set(ArrayElts); } Loads = give(isl_union_map_union(Loads.take(), AccRel.take())); @@ -357,7 +370,8 @@ R << "encountered write that is not a StoreInst: " << printInstruction(MA->getAccessInstruction()); S->getFunction().getContext().diagnose(R); - return false; + + IncompatibleElts = IncompatibleElts.add_set(ArrayElts); } // In region statements the order is less clear, eg. the load and store @@ -369,7 +383,8 @@ MA->getAccessInstruction()); R << "store is in a non-affine subregion"; S->getFunction().getContext().diagnose(R); - return false; + + IncompatibleElts = IncompatibleElts.add_set(ArrayElts); } // Do not allow more than one store to the same location. @@ -382,13 +397,12 @@ R << " (previous stores: " << Stores; R << ", storing: " << AccRel << ")"; S->getFunction().getContext().diagnose(R); - return false; + + IncompatibleElts = IncompatibleElts.add_set(ArrayElts); } Stores = give(isl_union_map_union(Stores.take(), AccRel.take())); } - - return true; } void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) { @@ -397,7 +411,7 @@ ScopStmt *Stmt = MA->getStatement(); // { DomainRead[] -> Element[] } - auto AccRel = getAccessRelationFor(MA); + auto AccRel = intersectRange(getAccessRelationFor(MA), CompatibleElts); AllReads = give(isl_union_map_add_map(AllReads.take(), AccRel.copy())); if (LoadInst *Load = dyn_cast_or_null(MA->getAccessInstruction())) { @@ -424,7 +438,7 @@ auto *Stmt = MA->getStatement(); // { Domain[] -> Element[] } - auto AccRel = getAccessRelationFor(MA); + auto AccRel = intersectRange(getAccessRelationFor(MA), CompatibleElts); if (MA->isMustWrite()) AllMustWrites = @@ -459,12 +473,21 @@ return give(isl_union_map_empty(ParamSpace.copy())); } -bool ZoneAlgorithm::isCompatibleScop() { - for (auto &Stmt : *S) { - if (!isCompatibleStmt(&Stmt)) - return false; - } - return true; +void ZoneAlgorithm::collectCompatibleElts() { + // First find all the incompatible elements, then take the complement. + // We compile the list of compatible (rather than incompatible) elements so + // users can intersect with the list, not requiring a subtract operation. It + // also allows us to define a 'universe' of all elements and makes it more + // explicit in which array elements can be used. + isl::union_set AllElts = makeEmptyUnionSet(); + isl::union_set IncompatibleElts = makeEmptyUnionSet(); + + for (auto &Stmt : *S) + collectIncompatibleElts(&Stmt, IncompatibleElts, AllElts); + + NumIncompatibleArrays += isl_union_set_n_set(IncompatibleElts.keep()); + CompatibleElts = AllElts.subtract(IncompatibleElts); + NumCompatibleArrays += isl_union_set_n_set(CompatibleElts.keep()); } isl::map ZoneAlgorithm::getScatterFor(ScopStmt *Stmt) const { @@ -655,6 +678,15 @@ llvm_unreachable("Unhandled use type"); } +bool ZoneAlgorithm::isCompatibleAccess(MemoryAccess *MA) { + if (!MA) + return false; + if (!MA->isLatestArrayKind()) + return false; + Instruction *AccInst = MA->getAccessInstruction(); + return isa(AccInst) || isa(AccInst); +} + void ZoneAlgorithm::computeCommon() { AllReads = makeEmptyUnionMap(); AllMayWrites = makeEmptyUnionMap(); @@ -666,6 +698,8 @@ for (auto *MA : Stmt) { if (!MA->isLatestArrayKind()) continue; + if (!isCompatibleAccess(MA)) + continue; if (MA->isRead()) addArrayReadAccess(MA); Index: polly/trunk/test/DeLICM/reduction_unrelatedunusual.ll =================================================================== --- polly/trunk/test/DeLICM/reduction_unrelatedunusual.ll +++ polly/trunk/test/DeLICM/reduction_unrelatedunusual.ll @@ -0,0 +1,103 @@ +; RUN: opt %loadPolly -polly-delicm-partial-writes=true -polly-delicm -analyze < %s | FileCheck -match-full-lines %s +; +; Map %add and %phi to A[j]. +; The non-analyzable store to C[0] is unrelated and can be ignored. +; +; void func(double *A) { +; for (int j = 0; j < 2; j += 1) { /* outer */ +; double phi = 0.0; +; for (int i = 0; i < 4; i += 1) { /* reduction */ +; phi += 4.2; +; C[0] = 21.0; +; C[0] = 42.0; +; } +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A, double* noalias nonnull %C) { +entry: + br label %outer.for + +outer.for: + %j = phi i32 [0, %entry], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 2 + br i1 %j.cmp, label %reduction.for, label %outer.exit + + + reduction.for: + %i = phi i32 [0, %outer.for], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %outer.for], [%add, %reduction.inc] + %i.cmp = icmp slt i32 %i, 4 + br i1 %i.cmp, label %body, label %reduction.exit + + + + body: + %add = fadd double %phi, 4.2 + store double 21.0, double* %C + store double 41.0, double* %C + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + br label %reduction.for + + reduction.exit: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %phi, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_outer_for +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_outer_for[i0] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_outer_for[i0] -> MemRef_A[i0] : i0 <= 1 }; +; CHECK-NEXT: Stmt_reduction_for +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_C[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body[i0, i1] -> MemRef_C[0] }; +; CHECK-NEXT: Stmt_reduction_inc +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[i0] }; +; CHECK-NEXT: Stmt_reduction_exit +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_reduction_exit[i0] -> MemRef_phi[] }; +; CHECK-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } Index: polly/trunk/test/ForwardOpTree/forward_load_unrelatedunusual.ll =================================================================== --- polly/trunk/test/ForwardOpTree/forward_load_unrelatedunusual.ll +++ polly/trunk/test/ForwardOpTree/forward_load_unrelatedunusual.ll @@ -0,0 +1,64 @@ +; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines +; +; Rematerialize a load. +; The non-analyzable store to C[0] is unrelated and can be ignored. +; +; for (int j = 0; j < n; j += 1) { +; bodyA: +; double val = B[j]; +; C[0] = 21.0; +; C[0] = 42.0; +; +; bodyB: +; A[j] = val; +; } +; +define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B, double *noalias %C) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %bodyA, label %exit + + bodyA: + %B_idx = getelementptr inbounds double, double* %B, i32 %j + %val = load double, double* %B_idx + store double 21.0, double* %C + store double 41.0, double* %C + br label %bodyB + + bodyB: + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Known loads forwarded: 1 +; CHECK: Operand trees forwarded: 1 +; CHECK: Statements with forwarded operand trees: 1 +; CHECK: } + +; CHECK: Stmt_bodyB +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: null; +; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %val = load double, double* %B_idx +; CHECK-NEXT: store double %val, double* %A_idx +; CHECK-NEXT: }