Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -680,8 +680,14 @@ /// @brief Type for invariant memory accesses and their domain context. using InvariantAccessTy = std::pair; +/// @brief Type for an ordered list of invariant accesses. +using InvariantAccessListTy = std::forward_list; + +/// @brief Type for a class of equivalent invariant memory accesses. +using InvariantEquivClassTy = std::pair; + /// @brief Type for multiple invariant memory accesses and their domain context. -using InvariantAccessesTy = SmallVector; +using InvariantAccessesTy = SmallVector; ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop @@ -906,12 +912,12 @@ /// @brief Add @p Access to this statement's list of accesses. void addAccess(MemoryAccess *Access); - /// @brief Move the memory access in @p InvMAs to @p TargetList. + /// @brief Move the memory access in @p InvMAs to @p InvariantEquivClasses. /// /// Note that scalar accesses that are caused by any access in @p InvMAs will /// be eliminated too. void hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList); + InvariantAccessesTy &InvariantEquivClasses); typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1135,7 +1141,7 @@ MinMaxVectorPairVectorTy MinMaxAliasGroups; /// @brief List of invariant accesses. - InvariantAccessesTy InvariantAccesses; + InvariantAccessesTy InvariantEquivClasses; /// @brief Scop constructor; invoked from ScopInfo::buildScop. Scop(Region &R, AccFuncMapType &AccFuncMap, ScopDetection &SD, @@ -1186,6 +1192,20 @@ /// @see isIgnored() void simplifySCoP(bool RemoveIgnoredStmts); + /// @brief Create equivalence classes for required invariant accesses. + /// + /// These classes will consolidate multiple required invariant loads from the + /// same address in order to keep the number of dimensions in the SCoP + /// description small. For each such class equivalence class only one + /// representing element, hence one required invariant load, will be chosen + /// and modeled as parameter. The method + /// Scop::getRepresentingInvariantLoadSCEV() will replace each element from an + /// equivalence class with the representing element that is modeled. As a + /// consequence Scop::getIdForParam() will only return an id for the + /// representing element of each equivalence class, thus for each required + /// invariant location. + void buildInvariantEquivalenceClasses(); + /// @brief Hoist invariant memory loads and check for required ones. /// /// We first identify "common" invariant loads, thus loads that are invariant @@ -1220,6 +1240,19 @@ /// @brief Simplify the assumed and boundary context. void simplifyContexts(); + /// @brief Get the representing SCEV for @p S if applicable, otherwise @p S. + /// + /// Invariant loads of the same location are put in an equivalence class and + /// only one of them is chosen as a representing element that will be + /// modeled as a parameter. The others have to be normalized, i.e., + /// replaced by the representing element of their equivalence class, in order + /// to get the correct parameter value, e.g., in the SCEVAffinator. + /// + /// @param S The SCEV to normalize. + /// + /// @return The representing SCEV for invariant loads or @p S if none. + const SCEV *getRepresentingInvariantLoadSCEV(const SCEV *S) const; + /// @brief Create a new SCoP statement for either @p BB or @p R. /// /// Either @p BB or @p R should be non-null. A new statement for the non-null @@ -1340,7 +1373,7 @@ /// @brief Return the set of invariant accesses. const InvariantAccessesTy &getInvariantAccesses() const { - return InvariantAccesses; + return InvariantEquivClasses; } /// @brief Mark the SCoP as optimized by the scheduler. Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -1356,7 +1356,7 @@ void ScopStmt::dump() const { print(dbgs()); } void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList) { + InvariantAccessesTy &InvariantEquivClasses) { // Remove all memory accesses in @p InvMAs from this statement together // with all scalar accesses that were caused by them. The tricky iteration @@ -1409,8 +1409,49 @@ } } - for (MemoryAccess *MA : InvMAs) - TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx))); + for (MemoryAccess *MA : InvMAs) { + + // Check for another invariant access that accesses the same location as + // MA and if found consolidate them. Otherwise create a new equivalence + // class at the end of InvariantEquivClasses. + LoadInst *LInst = cast(MA->getAccessInstruction()); + const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); + bool Consolidated = false; + + for (auto &IAClass : InvariantEquivClasses) { + const SCEV *ClassPointerSCEV = IAClass.first; + if (PointerSCEV != ClassPointerSCEV) + continue; + + Consolidated = true; + + // We created empty equivalence classes for required invariant loads + // in the beginning and might encounter one of them here. If so, this + // MA will be the first in that equivalence class. + auto &ClassList = IAClass.second; + if (ClassList.empty()) { + ClassList.push_front(std::make_pair(MA, isl_set_copy(DomainCtx))); + break; + } + + // If the equivalence class for MA is not empty we unify the execution + // context and add MA to the list of accesses that are in this class. + isl_set *IAClassDomainCtx = IAClass.second.front().second; + IAClassDomainCtx = + isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx)); + ClassList.push_front(std::make_pair(MA, IAClassDomainCtx)); + break; + } + + if (Consolidated) + continue; + + // If we did not consolidate MA, thus did not find an equivalence class + // that for it, we create a new one. + InvariantAccessTy IA = std::make_pair(MA, isl_set_copy(DomainCtx)); + InvariantEquivClasses.emplace_back(InvariantEquivClassTy( + std::make_pair(PointerSCEV, InvariantAccessListTy({IA})))); + } isl_set_free(DomainCtx); } @@ -1424,9 +1465,34 @@ Context = NewContext; } +const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *S) const { + const SCEVUnknown *SU = dyn_cast_or_null(S); + if (!SU) + return S; + + LoadInst *LInst = dyn_cast(SU->getValue()); + if (!LInst) + return S; + + // Try to find an equivalence class for the load, if found return + // the SCEV for the representing element, otherwise return S. + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + for (const InvariantEquivClassTy &IAClass : InvariantEquivClasses) { + const SCEV *ClassPointerSCEV = IAClass.first; + if (ClassPointerSCEV == PointerSCEV) + return ClassPointerSCEV; + } + + return S; +} + void Scop::addParams(std::vector NewParameters) { for (const SCEV *Parameter : NewParameters) { Parameter = extractConstantFactor(Parameter, *SE).second; + + // Normalize the SCEV to get the representing element for an invariant load. + Parameter = getRepresentingInvariantLoadSCEV(Parameter); + if (ParameterIds.find(Parameter) != ParameterIds.end()) continue; @@ -1438,6 +1504,9 @@ } __isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const { + // Normalize the SCEV to get the representing element for an invariant load. + Parameter = getRepresentingInvariantLoadSCEV(Parameter); + ParamIdType::const_iterator IdIter = ParameterIds.find(Parameter); if (IdIter == ParameterIds.end()) @@ -1515,6 +1584,21 @@ isl_space_free(Space); } +void Scop::buildInvariantEquivalenceClasses() { + const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion()); + SmallPtrSet ClassPointerSet; + for (LoadInst *LInst : RIL) { + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + + // Skip the load if we already have a equivalence class for the pointer. + if (!ClassPointerSet.insert(PointerSCEV).second) + continue; + + InvariantEquivClasses.emplace_back(InvariantEquivClassTy( + std::make_pair(PointerSCEV, InvariantAccessListTy()))); + } +} + void Scop::buildContext() { isl_space *Space = isl_space_params_alloc(IslCtx, 0); Context = isl_set_universe(isl_space_copy(Space)); @@ -2337,6 +2421,8 @@ void Scop::init(AliasAnalysis &AA) { buildContext(); + buildInvariantEquivalenceClasses(); + buildDomains(&R); // Remove empty and ignored statements. @@ -2388,8 +2474,9 @@ } } - for (const auto &IA : InvariantAccesses) - isl_set_free(IA.second); + for (const auto &IAClass : InvariantEquivClasses) + if (!IAClass.second.empty()) + isl_set_free(IAClass.second.front().second); } void Scop::updateAccessDimensionality() { @@ -2478,17 +2565,18 @@ InvMAs.reverse(); // Transfer the memory access from the statement to the SCoP. - Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses); + Stmt.hoistMemoryAccesses(InvMAs, InvariantEquivClasses); isl_set_free(Domain); } isl_union_map_free(Writes); - if (!InvariantAccesses.empty()) + if (!InvariantEquivClasses.empty()) IsOptimized = true; + auto &ScopRIL = *SD.getRequiredInvariantLoads(&getRegion()); // Check required invariant loads that were tagged during SCoP detection. - for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) { + for (LoadInst *LI : ScopRIL) { assert(LI && getRegion().contains(LI)); ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent()); if (Stmt && Stmt->lookupAccessesFor(LI) != nullptr) { @@ -2511,8 +2599,12 @@ // we already ordered the accesses such that indirect loads can be resolved, // thus we use a stable sort here. - auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0, - const InvariantAccessTy &IA1) { + auto compareInvariantAccesses = [this]( + const InvariantEquivClassTy &IAClass0, + const InvariantEquivClassTy &IAClass1) { + const InvariantAccessTy &IA0 = IAClass0.second.front(); + const InvariantAccessTy &IA1 = IAClass1.second.front(); + Instruction *AI0 = IA0.first->getAccessInstruction(); Instruction *AI1 = IA1.first->getAccessInstruction(); @@ -2554,7 +2646,7 @@ return Involves1Id0; }; - std::stable_sort(InvariantAccesses.begin(), InvariantAccesses.end(), + std::stable_sort(InvariantEquivClasses.begin(), InvariantEquivClasses.end(), compareInvariantAccesses); } @@ -2739,9 +2831,14 @@ OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; OS.indent(4) << "Invariant Accesses: {\n"; - for (const auto &IA : InvariantAccesses) { - IA.first->print(OS); - OS.indent(12) << "Execution Context: " << IA.second << "\n"; + for (const auto &IAClass : InvariantEquivClasses) { + if (IAClass.second.empty()) { + OS.indent(12) << "Class Pointer: " << IAClass.first << "\n"; + } else { + IAClass.second.front().first->print(OS); + OS.indent(12) << "Execution Context: " << IAClass.second.front().second + << "\n"; + } } OS.indent(4) << "}\n"; printContext(OS.indent(4)); Index: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp +++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp @@ -906,8 +906,8 @@ void IslNodeBuilder::preloadInvariantLoads() { - const auto &InvAccList = S.getInvariantAccesses(); - if (InvAccList.empty()) + const auto &InvariantEquivClasses = S.getInvariantAccesses(); + if (InvariantEquivClasses.empty()) return; const Region &R = S.getRegion(); @@ -921,14 +921,20 @@ isl_ast_build *Build = isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); - for (const auto &IA : InvAccList) { - MemoryAccess *MA = IA.first; + // For each equivalence class of invariant loads we pre-load the representing + // element with the unified execution context. However, we have to map all + // elements of the class to the one preloaded load as they are referenced + // during the code generation and therefor need to be mapped. + for (const auto &IAClass : InvariantEquivClasses) { + + MemoryAccess *MA = IAClass.second.front().first; assert(!MA->isImplicit()); - isl_set *Domain = isl_set_copy(IA.second); + isl_set *Domain = isl_set_copy(IAClass.second.front().second); Instruction *AccInst = MA->getAccessInstruction(); Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); - ValueMap[AccInst] = PreloadVal; + for (const InvariantAccessTy &IA : IAClass.second) + ValueMap[IA.first->getAccessInstruction()] = PreloadVal; if (SE.isSCEVable(AccInst->getType())) { isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); Index: polly/trunk/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll +++ polly/trunk/test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-codegen -polly-parallel \ +; RUN: -polly-parallel-force -S < %s | FileCheck %s +; +; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction. +; +; void f(float *A) { +; for (int i = 1; i < 1000; i++) +; A[i] += A[0] + A[0]; +; } +; +; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds +; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* nocapture %A) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = load float, float* %A, align 4 + %tmp2 = load float, float* %A, align 4 + %tmpadd = fadd float %tmp, %tmp2 + %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx1, align 4 + %add = fadd float %tmp2, %tmp1 + store float %add, float* %arrayidx1, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} Index: polly/trunk/test/Isl/CodeGen/invariant_load_outermost.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/invariant_load_outermost.ll +++ polly/trunk/test/Isl/CodeGen/invariant_load_outermost.ll @@ -0,0 +1,37 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s + +; CHECK: polly.start + +; void f(int *A) { +; if (*A > 42) +; *A = *A + 1; +; else +; *A = *A - 1; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %entry.split + +entry.split: + %tmp = load i32, i32* %A, align 4 + %cmp = icmp sgt i32 %tmp, 42 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %tmp1 = load i32, i32* %A, align 4 + %add = add nsw i32 %tmp1, 1 + br label %if.end + +if.else: ; preds = %entry + %tmp2 = load i32, i32* %A, align 4 + %sub = add nsw i32 %tmp2, -1 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ] + store i32 %storemerge, i32* %A, align 4 + ret void +} Index: polly/trunk/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll +++ polly/trunk/test/Isl/CodeGen/whole-scop-non-affine-subregion.ll @@ -2,9 +2,9 @@ ; RUN: -polly-codegen -S < %s | FileCheck %s ; CHECK: polly.start - +; int /* pure */ g() ; void f(int *A) { -; if (*A > 42) +; if (g()) ; *A = *A + 1; ; else ; *A = *A - 1; @@ -17,22 +17,26 @@ br label %entry.split entry.split: - %tmp = load i32, i32* %A, align 4 - %cmp = icmp sgt i32 %tmp, 42 + %call = call i32 @g() + %cmp = icmp eq i32 %call, 0 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry %tmp1 = load i32, i32* %A, align 4 %add = add nsw i32 %tmp1, 1 + store i32 %add, i32* %A, align 4 br label %if.end if.else: ; preds = %entry %tmp2 = load i32, i32* %A, align 4 %sub = add nsw i32 %tmp2, -1 + store i32 %sub, i32* %A, align 4 br label %if.end if.end: ; preds = %if.else, %if.then - %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ] - store i32 %storemerge, i32* %A, align 4 ret void } + +declare i32 @g() #0 + +attributes #0 = { nounwind readnone } Index: polly/trunk/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll =================================================================== --- polly/trunk/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ polly/trunk/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -17,8 +17,8 @@ ; CHECK: Invariant Accesses: { ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: MemRef_init_ptr[0] -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: MemRef_init_ptr[0] +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: MemRef_init_ptr[0] ; CHECK: } define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: Index: polly/trunk/test/ScopInfo/invariant_loads_complicated_dependences.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_loads_complicated_dependences.ll +++ polly/trunk/test/ScopInfo/invariant_loads_complicated_dependences.ll @@ -2,17 +2,17 @@ ; ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_for_body[i0] -> MemRef_LB[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : } +; CHECK-NEXT: [LB, UB] -> { Stmt_for_body[i0] -> MemRef_LB[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : } +; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : (tmp5 >= 1 + tmp and tmp5 >= 6) or tmp >= 6 } +; CHECK-NEXT: [LB, UB] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : (UB >= 1 + LB and UB >= 6) or LB >= 6 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : tmp <= 5 } +; CHECK-NEXT: [LB, UB] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : LB <= 5 } ; CHECK-NEXT: } ; ; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB, Index: polly/trunk/test/ScopInfo/invariant_loop_bounds.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_loop_bounds.ll +++ polly/trunk/test/ScopInfo/invariant_loop_bounds.ll @@ -3,28 +3,28 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[2] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_0 >= 1 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_1 >= 1 and p_0 >= 1 } ; CHECK-NEXT: } ; -; CHECK: p0: %tmp -; CHECK: p1: %tmp8 -; CHECK: p2: %tmp10 +; CHECK: p0: (8 + @bounds) +; CHECK: p1: (4 + @bounds) +; CHECK: p2: @bounds ; CHECK: Statements { ; CHECK: Stmt_for_body_6 ; CHECK: Domain := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + p_0 and i1 >= 0 and i1 <= -1 + p_1 and i2 >= 0 and i2 <= -1 + bounds }; ; CHECK: Schedule := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: } ; ; int bounds[3]; Index: polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll +++ polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll @@ -0,0 +1,106 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_bounds[0] +; CHECK-NEXT: Execution Context: [bounds] -> { : } +; CHECK-NEXT: } +; +; CHECK: p0: @bounds +; CHECK-NOT: p1 +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: Schedule := +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } +; +; int bounds[1]; +; double data[1024][1024][1024]; +; +; void foo() { +; int i, j, k; +; for (k = 0; k < bounds[0]; k++) +; for (j = 0; j < bounds[0]; j++) +; for (i = 0; i < bounds[0]; i++) +; data[k][j][i] += i + j + k; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@bounds = common global [1 x i32] zeroinitializer, align 4 +@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 + +define void @foo() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.16, %entry + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %tmp to i64 + %cmp = icmp slt i64 %indvars.iv5, %tmp7 + br i1 %cmp, label %for.body, label %for.end.18 + +for.body: ; preds = %for.cond + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc.13, %for.body + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %tmp8 to i64 + %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 + br i1 %cmp2, label %for.body.3, label %for.end.15 + +for.body.3: ; preds = %for.cond.1 + br label %for.cond.4 + +for.cond.4: ; preds = %for.inc, %for.body.3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %tmp10 to i64 + %cmp5 = icmp slt i64 %indvars.iv, %tmp11 + br i1 %cmp5, label %for.body.6, label %for.end + +for.body.6: ; preds = %for.cond.4 + %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3 + %tmp13 = add nsw i64 %tmp12, %indvars.iv5 + %tmp14 = trunc i64 %tmp13 to i32 + %conv = sitofp i32 %tmp14 to double + %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv + %tmp15 = load double, double* %arrayidx11, align 8 + %add12 = fadd double %tmp15, %conv + store double %add12, double* %arrayidx11, align 8 + br label %for.inc + +for.inc: ; preds = %for.body.6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.4 + +for.end: ; preds = %for.cond.4 + br label %for.inc.13 + +for.inc.13: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond.1 + +for.end.15: ; preds = %for.cond.1 + br label %for.inc.16 + +for.inc.16: ; preds = %for.end.15 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + br label %for.cond + +for.end.18: ; preds = %for.cond + ret void +} Index: polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll +++ polly/trunk/test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll @@ -0,0 +1,109 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_bounds[0] +; CHECK-NEXT: Execution Context: [bounds, p] -> { : } +; CHECK-NEXT: } +; +; CHECK: p0: @bounds +; CHECK: p1: %p +; CHECK-NOT: p2: +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] : p = 0 and i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: Schedule := +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } +; +; int bounds[1]; +; double data[1024][1024][1024]; +; +; void foo(int p) { +; int i, j, k; +; for (k = 0; k < bounds[0]; k++) +; if (p == 0) +; for (j = 0; j < bounds[0]; j++) +; for (i = 0; i < bounds[0]; i++) +; data[k][j][i] += i + j + k; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@bounds = common global [1 x i32] zeroinitializer, align 4 +@data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 + +define void @foo(i32 %p) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.16, %entry + %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp7 = sext i32 %tmp to i64 + %cmp = icmp slt i64 %indvars.iv5, %tmp7 + br i1 %cmp, label %for.body, label %for.end.18 + +for.body: ; preds = %for.cond + %cmpp = icmp eq i32 %p, 0 + br i1 %cmpp, label %for.cond.1, label %for.inc.16 + +for.cond.1: ; preds = %for.inc.13, %for.body + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp9 = sext i32 %tmp8 to i64 + %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 + br i1 %cmp2, label %for.body.3, label %for.end.15 + +for.body.3: ; preds = %for.cond.1 + br label %for.cond.4 + +for.cond.4: ; preds = %for.inc, %for.body.3 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp11 = sext i32 %tmp10 to i64 + %cmp5 = icmp slt i64 %indvars.iv, %tmp11 + br i1 %cmp5, label %for.body.6, label %for.end + +for.body.6: ; preds = %for.cond.4 + %tmp12 = add nsw i64 %indvars.iv, %indvars.iv3 + %tmp13 = add nsw i64 %tmp12, %indvars.iv5 + %tmp14 = trunc i64 %tmp13 to i32 + %conv = sitofp i32 %tmp14 to double + %arrayidx11 = getelementptr inbounds [1024 x [1024 x [1024 x double]]], [1024 x [1024 x [1024 x double]]]* @data, i64 0, i64 %indvars.iv5, i64 %indvars.iv3, i64 %indvars.iv + %tmp15 = load double, double* %arrayidx11, align 8 + %add12 = fadd double %tmp15, %conv + store double %add12, double* %arrayidx11, align 8 + br label %for.inc + +for.inc: ; preds = %for.body.6 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.4 + +for.end: ; preds = %for.cond.4 + br label %for.inc.13 + +for.inc.13: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond.1 + +for.end.15: ; preds = %for.cond.1 + br label %for.inc.16 + +for.inc.16: ; preds = %for.end.15 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + br label %for.cond + +for.end.18: ; preds = %for.cond + ret void +} Index: polly/trunk/test/ScopInfo/required-invariant-loop-bounds.ll =================================================================== --- polly/trunk/test/ScopInfo/required-invariant-loop-bounds.ll +++ polly/trunk/test/ScopInfo/required-invariant-loop-bounds.ll @@ -3,10 +3,10 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : } +; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : tmp >= 0 } +; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : bounds >= 0 } ; CHECK: } ; double A[1000][1000];