Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -699,8 +699,14 @@ /// @brief Type for invariant memory accesses and their domain context. using InvariantAccessTy = std::pair; +/// @brief Type for an order list of invariant accesses. +using InvariantAccessListTy = std::forward_list; + +/// @brief Type for a class of equivalent invariant memory accesses. +using InvariantAccessClassTy = std::pair; + /// @brief Type for multiple invariant memory accesses and their domain context. -using InvariantAccessesTy = SmallVector; +using InvariantAccessesTy = SmallVector; ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop @@ -925,12 +931,12 @@ BB = Block; } - /// @brief Move the memory access in @p InvMAs to @p TargetList. + /// @brief Move the memory access in @p InvMAs to @p InvariantAccesses. /// /// Note that scalar accesses that are caused by any access in @p InvMAs will /// be eliminated too. void hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList); + InvariantAccessesTy &InvariantAccesses); typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1049,6 +1055,10 @@ /// The statements in this Scop. StmtSet Stmts; + /// @brief Type for equivalence classes of invariant loads. + using InvariantLoadsClassesTy = + llvm::DenseMap; + /// Parameters of this Scop typedef SmallVector ParamVecType; ParamVecType Parameters; @@ -1215,6 +1225,9 @@ /// - removal of empty statements (due to invariant load hoisting) void simplifySCoP(); + /// @brief Create invariant access classes for required invariant accesses. + void buildInvariantAccessClasses(ScopDetection &SD); + /// @brief Hoist invariant memory loads and check for required ones. void hoistInvariantLoads(); @@ -1233,6 +1246,13 @@ /// @brief Simplify the assumed and boundary context. void simplifyContexts(); + /// @brief Get the representing SCEV for invariant loads if applicable. + /// + /// @param S The SCEV to normalize. + /// + /// @return The representing SCEV for invariant loads or @p S if none. + const SCEV *normalizeInvariantLoadSCEV(const SCEV *S) const; + /// @brief Create a new SCoP statement for either @p BB or @p R. /// /// Either @p BB or @p R should be non-null. A new statement for the non-null Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1364,7 +1364,7 @@ void ScopStmt::dump() const { print(dbgs()); } void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, - InvariantAccessesTy &TargetList) { + InvariantAccessesTy &InvariantAccesses) { // Remove all memory accesses in @p InvMAs from this statement together // with all scalar accesses that were caused by them. The tricky iteration @@ -1415,8 +1415,42 @@ } } - for (MemoryAccess *MA : InvMAs) - TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx))); + for (MemoryAccess *MA : InvMAs) { + + // Check for another invariant access that accesses the same location as + // MA and if found consolidate them. Otherwise create a new equivalence + // class at the end of InvariantAccesses. + LoadInst *LInst = cast(MA->getAccessInstruction()); + const SCEV *PointerSCEV = SE.getSCEV(LInst->getPointerOperand()); + bool Consolidated = false; + + for (auto &IAClass : InvariantAccesses) { + const SCEV *ClassPointerSCEV = IAClass.first; + if (PointerSCEV != ClassPointerSCEV) + continue; + + Consolidated = true; + + auto &ClassList = IAClass.second; + if (ClassList.empty()) { + ClassList.push_front(std::make_pair(MA, isl_set_copy(DomainCtx))); + break; + } + + isl_set *IAClassDomainCtx = IAClass.second.front().second; + IAClassDomainCtx = + isl_set_union(IAClassDomainCtx, isl_set_copy(DomainCtx)); + ClassList.push_front(std::make_pair(MA, IAClassDomainCtx)); + break; + } + + if (Consolidated) + continue; + + InvariantAccessTy IA = std::make_pair(MA, isl_set_copy(DomainCtx)); + InvariantAccesses.emplace_back(InvariantAccessClassTy( + std::make_pair(PointerSCEV, InvariantAccessListTy({IA})))); + } isl_set_free(DomainCtx); } @@ -1430,9 +1464,34 @@ Context = NewContext; } +const SCEV *Scop::normalizeInvariantLoadSCEV(const SCEV *S) const { + const SCEVUnknown *SU = dyn_cast_or_null(S); + if (!SU) + return S; + + LoadInst *LInst = dyn_cast(SU->getValue()); + if (!LInst) + return S; + + // Try to find an equivalence class for the load, if found return + // the SCEV for the representing element, otherwise return S. + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + for (const InvariantAccessClassTy &IAClass : InvariantAccesses) { + const SCEV *ClassPointerSCEV = IAClass.first; + if (ClassPointerSCEV == PointerSCEV) + return ClassPointerSCEV; + } + + return S; +} + void Scop::addParams(std::vector NewParameters) { for (const SCEV *Parameter : NewParameters) { Parameter = extractConstantFactor(Parameter, *SE).second; + + // Normalize invariant loads first to get the correct parameter SCEV. + Parameter = normalizeInvariantLoadSCEV(Parameter); + if (ParameterIds.find(Parameter) != ParameterIds.end()) continue; @@ -1444,6 +1503,9 @@ } __isl_give isl_id *Scop::getIdForParam(const SCEV *Parameter) const { + // Normalize invariant loads first to get the correct parameter SCEV. + Parameter = normalizeInvariantLoadSCEV(Parameter); + ParamIdType::const_iterator IdIter = ParameterIds.find(Parameter); if (IdIter == ParameterIds.end()) @@ -1521,6 +1583,21 @@ isl_space_free(Space); } +void Scop::buildInvariantAccessClasses(ScopDetection &SD) { + const InvariantLoadsSetTy &RIL = *SD.getRequiredInvariantLoads(&getRegion()); + SmallPtrSet ClassPointerSet; + for (LoadInst *LInst : RIL) { + const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); + + // Skip the load if we already have a equivalence class for the pointer. + if (!ClassPointerSet.insert(PointerSCEV).second) + continue; + + InvariantAccesses.emplace_back(InvariantAccessClassTy( + std::make_pair(PointerSCEV, InvariantAccessListTy()))); + } +} + void Scop::buildContext() { isl_space *Space = isl_space_params_alloc(IslCtx, 0); Context = isl_set_universe(isl_space_copy(Space)); @@ -2343,6 +2420,7 @@ void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) { buildContext(); + buildInvariantAccessClasses(SD); buildDomains(&R, LI, SD, DT); @@ -2386,8 +2464,9 @@ } } - for (const auto &IA : InvariantAccesses) - isl_set_free(IA.second); + for (const auto &IAClass : InvariantAccesses) + if (!IAClass.second.empty()) + isl_set_free(IAClass.second.front().second); } void Scop::updateAccessDimensionality() { @@ -2472,8 +2551,9 @@ if (!InvariantAccesses.empty()) IsOptimized = true; + auto &ScopRIL = *SD.getRequiredInvariantLoads(&getRegion()); // Check required invariant loads that were tagged during SCoP detection. - for (LoadInst *LI : *SD.getRequiredInvariantLoads(&getRegion())) { + for (LoadInst *LI : ScopRIL) { assert(LI && getRegion().contains(LI)); ScopStmt *Stmt = getStmtForBasicBlock(LI->getParent()); if (Stmt && Stmt->lookupAccessesFor(LI) != nullptr) { @@ -2496,8 +2576,12 @@ // we already ordered the accesses such that indirect loads can be resolved, // thus we use a stable sort here. - auto compareInvariantAccesses = [this](const InvariantAccessTy &IA0, - const InvariantAccessTy &IA1) { + auto compareInvariantAccesses = [this]( + const InvariantAccessClassTy &IAClass0, + const InvariantAccessClassTy &IAClass1) { + const InvariantAccessTy &IA0 = IAClass0.second.front(); + const InvariantAccessTy &IA1 = IAClass1.second.front(); + Instruction *AI0 = IA0.first->getAccessInstruction(); Instruction *AI1 = IA1.first->getAccessInstruction(); @@ -2724,9 +2808,14 @@ OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; OS.indent(4) << "Invariant Accesses: {\n"; - for (const auto &IA : InvariantAccesses) { - IA.first->print(OS); - OS.indent(12) << "Execution Context: " << IA.second << "\n"; + for (const auto &IAClass : InvariantAccesses) { + if (IAClass.second.empty()) { + OS.indent(12) << "Class Pointer: " << IAClass.first << "\n"; + } else { + IAClass.second.front().first->print(OS); + OS.indent(12) << "Execution Context: " << IAClass.second.front().second + << "\n"; + } } OS.indent(4) << "}\n"; printContext(OS.indent(4)); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -592,8 +592,9 @@ // Values preloaded prior to the SCoP need to be available in the subfunction. const auto &InvariantAccesses = S.getInvariantAccesses(); - for (const InvariantAccessTy &IA : InvariantAccesses) - SubtreeValues.insert(ValueMap[IA.first->getAccessInstruction()]); + for (const auto &IAClass : InvariantAccesses) + for (const InvariantAccessTy &IA : IAClass.second) + SubtreeValues.insert(ValueMap[IA.first->getAccessInstruction()]); ParallelLoopGenerator::ValueToValueMapTy NewValues; ParallelLoopGenerator ParallelLoopGen(Builder, P, LI, DT, DL); @@ -910,8 +911,8 @@ void IslNodeBuilder::preloadInvariantLoads() { - const auto &InvAccList = S.getInvariantAccesses(); - if (InvAccList.empty()) + const auto &InvariantAccesses = S.getInvariantAccesses(); + if (InvariantAccesses.empty()) return; const Region &R = S.getRegion(); @@ -925,14 +926,16 @@ isl_ast_build *Build = isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); - for (const auto &IA : InvAccList) { - MemoryAccess *MA = IA.first; + for (const auto &IAClass : InvariantAccesses) { + + MemoryAccess *MA = IAClass.second.front().first; assert(!MA->isImplicit()); - isl_set *Domain = isl_set_copy(IA.second); + isl_set *Domain = isl_set_copy(IAClass.second.front().second); Instruction *AccInst = MA->getAccessInstruction(); Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); - ValueMap[AccInst] = PreloadVal; + for (const InvariantAccessTy &IA : IAClass.second) + ValueMap[IA.first->getAccessInstruction()] = PreloadVal; if (SE.isSCEVable(AccInst->getType())) { isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); Index: test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/invariant_base_pointers_preloaded.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-codegen -polly-parallel \ +; RUN: -polly-parallel-force -polly-detect-unprofitable -S < %s | FileCheck %s +; +; Test to verify that we hand down the preloaded A[0] to the OpenMP subfunction. +; +; void f(float *A) { +; for (int i = 1; i < 1000; i++) +; A[i] += A[0] + A[0]; +; } +; +; CHECK: %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds { float*, float }, { float*, float }* %polly.par.userContext, i32 0, i32 1 +; CHECK: store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* nocapture %A) { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] + %tmp = load float, float* %A, align 4 + %tmp2 = load float, float* %A, align 4 + %tmpadd = fadd float %tmp, %tmp2 + %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx1, align 4 + %add = fadd float %tmp2, %tmp1 + store float %add, float* %arrayidx1, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1000 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} Index: test/Isl/CodeGen/whole-scop-non-affine-subregion.ll =================================================================== --- test/Isl/CodeGen/whole-scop-non-affine-subregion.ll +++ test/Isl/CodeGen/whole-scop-non-affine-subregion.ll @@ -2,9 +2,9 @@ ; RUN: -polly-codegen -S < %s | FileCheck %s ; CHECK: polly.start - +; int /* pure */ g() ; void f(int *A) { -; if (*A > 42) +; if (g()) ; *A = *A + 1; ; else ; *A = *A - 1; @@ -17,22 +17,26 @@ br label %entry.split entry.split: - %tmp = load i32, i32* %A, align 4 - %cmp = icmp sgt i32 %tmp, 42 + %call = call i32 @g() + %cmp = icmp eq i32 %call, 0 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry %tmp1 = load i32, i32* %A, align 4 %add = add nsw i32 %tmp1, 1 + store i32 %add, i32* %A, align 4 br label %if.end if.else: ; preds = %entry %tmp2 = load i32, i32* %A, align 4 %sub = add nsw i32 %tmp2, -1 + store i32 %sub, i32* %A, align 4 br label %if.end if.end: ; preds = %if.else, %if.then - %storemerge = phi i32 [ %sub, %if.else ], [ %add, %if.then ] - store i32 %storemerge, i32* %A, align 4 ret void } + +declare i32 @g() #0 + +attributes #0 = { nounwind readnone } Index: test/ScopInfo/intra_and_inter_bb_scalar_dep.ll =================================================================== --- test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -17,8 +17,8 @@ ; CHECK: Invariant Accesses: { ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: MemRef_init_ptr[0] -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: MemRef_init_ptr[0] +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: MemRef_init_ptr[0] ; CHECK: } define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: Index: test/ScopInfo/invariant_loads_complicated_dependences.ll =================================================================== --- test/ScopInfo/invariant_loads_complicated_dependences.ll +++ test/ScopInfo/invariant_loads_complicated_dependences.ll @@ -2,17 +2,17 @@ ; ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_for_body[i0] -> MemRef_LB[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : } +; CHECK-NEXT: [LB, UB] -> { Stmt_for_body[i0] -> MemRef_LB[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : } +; CHECK-NEXT: [LB, UB] -> { Stmt_do_cond[i0, i1] -> MemRef_UB[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : tmp <= 5 } +; CHECK-NEXT: [LB, UB] -> { Stmt_if_else[i0, i1] -> MemRef_U[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : LB <= 5 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [tmp, tmp5] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; -; CHECK-NEXT: Execution Context: [tmp, tmp5] -> { : (tmp5 >= 1 + tmp and tmp5 >= 6) or tmp >= 6 } +; CHECK-NEXT: [LB, UB] -> { Stmt_if_then[i0, i1] -> MemRef_V[0] }; +; CHECK-NEXT: Execution Context: [LB, UB] -> { : (UB >= 1 + LB and UB >= 6) or LB >= 6 } ; CHECK-NEXT: } ; ; void f(int *restrict A, int *restrict V, int *restrict U, int *restrict UB, Index: test/ScopInfo/invariant_loop_bounds.ll =================================================================== --- test/ScopInfo/invariant_loop_bounds.ll +++ test/ScopInfo/invariant_loop_bounds.ll @@ -3,28 +3,28 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[2] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_0 >= 1 } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 } +; CHECK-NEXT: Execution Context: [p_0, p_1, bounds] -> { : p_1 >= 1 and p_0 >= 1 } ; CHECK-NEXT: } ; -; CHECK: p0: %tmp -; CHECK: p1: %tmp8 -; CHECK: p2: %tmp10 +; CHECK: p0: (8 + @bounds) +; CHECK: p1: (4 + @bounds) +; CHECK: p2: @bounds ; CHECK: Statements { ; CHECK: Stmt_for_body_6 ; CHECK: Domain := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + p_0 and i1 >= 0 and i1 <= -1 + p_1 and i2 >= 0 and i2 <= -1 + bounds }; ; CHECK: Schedule := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: [p_0, p_1, bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; ; CHECK: } ; ; int bounds[3]; Index: test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll =================================================================== --- test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll +++ test/ScopInfo/invariant_same_loop_bound_multiple_times-1.ll @@ -1,46 +1,44 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: MemRef_bounds[2] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : } -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 } -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 } +; CHECK-NEXT: Execution Context: [bounds] -> { : } ; CHECK-NEXT: } ; -; CHECK: p0: %tmp -; CHECK: p1: %tmp8 -; CHECK: p2: %tmp10 -; CHECK: Statements { -; CHECK: Stmt_for_body_6 -; CHECK: Domain := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 }; -; CHECK: Schedule := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; -; CHECK: } +; CHECK: p0: @bounds +; CHECK-NOT: p1 +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: Schedule := +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } ; -; int bounds[3]; +; int bounds[1]; ; double data[1024][1024][1024]; ; ; void foo() { ; int i, j, k; -; for (k = 0; k < bounds[2]; k++) -; for (j = 0; j < bounds[1]; j++) +; for (k = 0; k < bounds[0]; k++) +; for (j = 0; j < bounds[0]; j++) ; for (i = 0; i < bounds[0]; i++) ; data[k][j][i] += i + j + k; ; } ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -@bounds = common global [3 x i32] zeroinitializer, align 4 +@bounds = common global [1 x i32] zeroinitializer, align 4 @data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 define void @foo() { @@ -49,7 +47,7 @@ for.cond: ; preds = %for.inc.16, %entry %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] - %tmp = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 2), align 4 + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 %tmp7 = sext i32 %tmp to i64 %cmp = icmp slt i64 %indvars.iv5, %tmp7 br i1 %cmp, label %for.body, label %for.end.18 @@ -59,7 +57,7 @@ for.cond.1: ; preds = %for.inc.13, %for.body %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] - %tmp8 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 1), align 4 + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 %tmp9 = sext i32 %tmp8 to i64 %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 br i1 %cmp2, label %for.body.3, label %for.end.15 @@ -69,7 +67,7 @@ for.cond.4: ; preds = %for.inc, %for.body.3 %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] - %tmp10 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 %tmp11 = sext i32 %tmp10 to i64 %cmp5 = icmp slt i64 %indvars.iv, %tmp11 br i1 %cmp5, label %for.body.6, label %for.end Index: test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll =================================================================== --- test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll +++ test/ScopInfo/invariant_same_loop_bound_multiple_times-2.ll @@ -1,65 +1,66 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; +; Verify that we only have one parameter and one invariant load for all +; three loads that occure in the region but actually access the same +; location. Also check that the execution context is the most generic +; one, e.g., here the universal set. +; ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: MemRef_bounds[2] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : } -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp >= 1 } -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp8, tmp10] -> { : tmp8 >= 1 and tmp >= 1 } +; CHECK-NEXT: Execution Context: [bounds, p] -> { : } ; CHECK-NEXT: } ; -; CHECK: p0: %tmp -; CHECK: p1: %tmp8 -; CHECK: p2: %tmp10 -; CHECK: Statements { -; CHECK: Stmt_for_body_6 -; CHECK: Domain := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] : i0 >= 0 and i0 <= -1 + tmp and i1 >= 0 and i1 <= -1 + tmp8 and i2 >= 0 and i2 <= -1 + tmp10 }; -; CHECK: Schedule := -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK: [tmp, tmp8, tmp10] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; -; CHECK: } +; CHECK: p0: @bounds +; CHECK: p1: %p +; CHECK-NOT: p2: +; CHECK: Statements { +; CHECK: Stmt_for_body_6 +; CHECK: Domain := +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] : p = 0 and i0 >= 0 and i0 <= -1 + bounds and i1 >= 0 and i1 <= -1 + bounds and i2 >= 0 and i2 <= -1 + bounds }; +; CHECK: Schedule := +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [bounds, p] -> { Stmt_for_body_6[i0, i1, i2] -> MemRef_data[i0, i1, i2] }; +; CHECK: } ; -; int bounds[3]; +; int bounds[1]; ; double data[1024][1024][1024]; ; -; void foo() { +; void foo(int p) { ; int i, j, k; -; for (k = 0; k < bounds[2]; k++) -; for (j = 0; j < bounds[1]; j++) -; for (i = 0; i < bounds[0]; i++) -; data[k][j][i] += i + j + k; +; for (k = 0; k < bounds[0]; k++) +; if (p == 0) +; for (j = 0; j < bounds[0]; j++) +; for (i = 0; i < bounds[0]; i++) +; data[k][j][i] += i + j + k; ; } ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -@bounds = common global [3 x i32] zeroinitializer, align 4 +@bounds = common global [1 x i32] zeroinitializer, align 4 @data = common global [1024 x [1024 x [1024 x double]]] zeroinitializer, align 16 -define void @foo() { +define void @foo(i32 %p) { entry: br label %for.cond for.cond: ; preds = %for.inc.16, %entry %indvars.iv5 = phi i64 [ %indvars.iv.next6, %for.inc.16 ], [ 0, %entry ] - %tmp = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 2), align 4 + %tmp = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 %tmp7 = sext i32 %tmp to i64 %cmp = icmp slt i64 %indvars.iv5, %tmp7 br i1 %cmp, label %for.body, label %for.end.18 for.body: ; preds = %for.cond - br label %for.cond.1 + %cmpp = icmp eq i32 %p, 0 + br i1 %cmpp, label %for.cond.1, label %for.inc.16 for.cond.1: ; preds = %for.inc.13, %for.body %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.13 ], [ 0, %for.body ] - %tmp8 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 1), align 4 + %tmp8 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 %tmp9 = sext i32 %tmp8 to i64 %cmp2 = icmp slt i64 %indvars.iv3, %tmp9 br i1 %cmp2, label %for.body.3, label %for.end.15 @@ -69,7 +70,7 @@ for.cond.4: ; preds = %for.inc, %for.body.3 %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.3 ] - %tmp10 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @bounds, i64 0, i64 0), align 4 + %tmp10 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @bounds, i64 0, i64 0), align 4 %tmp11 = sext i32 %tmp10 to i64 %cmp5 = icmp slt i64 %indvars.iv, %tmp11 br i1 %cmp5, label %for.body.6, label %for.end Index: test/ScopInfo/required-invariant-loop-bounds.ll =================================================================== --- test/ScopInfo/required-invariant-loop-bounds.ll +++ test/ScopInfo/required-invariant-loop-bounds.ll @@ -3,10 +3,10 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[0] -; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : } +; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: MemRef_bounds[1] -; CHECK-NEXT: Execution Context: [tmp, tmp1] -> { : tmp >= 0 } +; CHECK-NEXT: Execution Context: [bounds, p_1] -> { : bounds >= 0 } ; CHECK: } ; double A[1000][1000];