Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -367,6 +367,16 @@ /// If the array is read only bool isReadOnly(); + /// Verify that @p Array is compatible to this ScopArrayInfo. + /// + /// Two arrays are compatible if their dimensionality, the sizes of their + /// dimensions, and their element sizes match. + /// + /// @param Array The array to compare against. + /// + /// @returns True, if the arrays are compatible, False otherwise. + bool isCompatibleWith(const ScopArrayInfo *Array) const; + private: void addDerivedSAI(ScopArrayInfo *DerivedSAI) { DerivedSAIs.insert(DerivedSAI); @@ -804,22 +814,14 @@ /// Get an isl string representing a new access function, if available. std::string getNewAccessRelationStr() const; - /// Get the base address of this access (e.g. A for A[i+j]) when + /// Get the original base address of this access (e.g. A for A[i+j]) when /// detected. - Value *getOriginalBaseAddr() const { - assert(!getOriginalScopArrayInfo() /* may noy yet be initialized */ || - getOriginalScopArrayInfo()->getBasePtr() == BaseAddr); - return BaseAddr; - } - - /// Get the base address of this access (e.g. A for A[i+j]) after a - /// potential change by setNewAccessRelation(). - Value *getLatestBaseAddr() const { - return getLatestScopArrayInfo()->getBasePtr(); - } - - /// Old name for getOriginalBaseAddr(). - Value *getBaseAddr() const { return getOriginalBaseAddr(); } + /// + /// This adress may differ from the base address referenced by the Original + /// ScopArrayInfo to which this array belongs, as this memory access may + /// have been unified to a ScopArray which has a different but identically + /// valued base pointer in case invariant load hoisting is enabled. + Value *getOriginalBaseAddr() const { return BaseAddr; } /// Get the detection-time base array isl_id for this access. __isl_give isl_id *getOriginalArrayId() const; @@ -1890,6 +1892,34 @@ /// void hoistInvariantLoads(); + /// Canonicalize arrays with base pointers from the same equivalence class. + /// + /// Some context: in our normal model we assume that each base pointer is + /// related to a single specific memory region, where memory regions + /// associated with different base pointers are disjoint. Consequently we do + /// not need to compute additional data dependences that model possible + /// overlaps of these memory regions. To verify our assumption we compute + /// alias checks that verify that modeled arrays indeed do not overlap. In + /// case an overlap is detected the runtime check fails and we fall back to + /// the original code. + /// + /// In case of arrays where the base pointers are know to be identical, + /// because they are dynamically loaded by accesses that are in the same + /// invariant load equivalence class, such run-time alias check would always + /// be false. + /// + /// This function makes sure that we do not generate consistently failing + /// run-time checks for code that contains distinct arrays with known + /// equivalent base pointers. It identifies for each invariant load + /// equivalence class a single canonical array and canonicalizes all memory + /// accesses that reference arrays that have base pointers that are known to + /// be equal to the base pointer of such a canonical array to this canonical + /// array. + /// + /// We currently do not canonicalize arrays for which certain memory accesses + /// have been hoisted as loop invariant. + void canonicalizeDynamicBasePtrs(); + /// Add invariant loads listed in @p InvMAs with the domain of @p Stmt. void addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs); @@ -2483,6 +2513,18 @@ /// /// @param BasePtr The base pointer the object has been stored for. /// @param Kind The kind of array info object. + /// + /// @returns The ScopArrayInfo pointer or NULL if no such pointer is + /// available. + const ScopArrayInfo *getScopArrayInfoOrNull(Value *BasePtr, MemoryKind Kind); + + /// Return the cached ScopArrayInfo object for @p BasePtr. + /// + /// @param BasePtr The base pointer the object has been stored for. + /// @param Kind The kind of array info object. + /// + /// @returns The ScopArrayInfo pointer (may assert if no such pointer is + /// available). const ScopArrayInfo *getScopArrayInfo(Value *BasePtr, MemoryKind Kind); /// Invalidate ScopArrayInfo object for base address. Index: polly/trunk/lib/Analysis/ScopBuilder.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopBuilder.cpp +++ polly/trunk/lib/Analysis/ScopBuilder.cpp @@ -776,6 +776,7 @@ return; scop->hoistInvariantLoads(); + scop->canonicalizeDynamicBasePtrs(); scop->verifyInvariantLoads(); scop->simplifySCoP(true); Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -279,6 +279,20 @@ return IsReadOnly; } +bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const { + if (Array->getElementType() != getElementType()) + return false; + + if (Array->getNumberOfDimensions() != getNumberOfDimensions()) + return false; + + for (unsigned i = 0; i < getNumberOfDimensions(); i++) + if (Array->getDimensionSize(i) != getDimensionSize(i)) + return false; + + return true; +} + void ScopArrayInfo::updateElementType(Type *NewElementType) { if (NewElementType == ElementType) return; @@ -3765,6 +3779,76 @@ isl_union_map_free(Writes); } +/// Find the canonical scop array info object for a set of invariant load +/// hoisted loads. The canonical array is the one that corresponds to the +/// first load in the list of accesses which is used as base pointer of a +/// scop array. +static const ScopArrayInfo *findCanonicalArray(Scop *S, + MemoryAccessList &Accesses) { + for (MemoryAccess *Access : Accesses) { + const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull( + Access->getAccessInstruction(), MemoryKind::Array); + if (CanonicalArray) + return CanonicalArray; + } + return nullptr; +} + +/// Check if @p Array severs as base array in an invariant load. +static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) { + for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses()) + for (MemoryAccess *Access2 : EqClass2.InvariantAccesses) + if (Access2->getScopArrayInfo() == Array) + return true; + return false; +} + +/// Replace the base pointer arrays in all memory accesses referencing @p Old, +/// with a reference to @p New. +static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old, + const ScopArrayInfo *New) { + for (ScopStmt &Stmt : *S) + for (MemoryAccess *Access : Stmt) { + if (Access->getLatestScopArrayInfo() != Old) + continue; + + isl_id *Id = New->getBasePtrId(); + isl_map *Map = Access->getAccessRelation(); + Map = isl_map_set_tuple_id(Map, isl_dim_out, Id); + Access->setAccessRelation(Map); + } +} + +void Scop::canonicalizeDynamicBasePtrs() { + for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) { + MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses; + + const ScopArrayInfo *CanonicalBasePtrSAI = + findCanonicalArray(this, BasePtrAccesses); + + if (!CanonicalBasePtrSAI) + continue; + + for (MemoryAccess *BasePtrAccess : BasePtrAccesses) { + const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull( + BasePtrAccess->getAccessInstruction(), MemoryKind::Array); + if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI || + !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI)) + continue; + + // we currently do not canonicalize arrays where some accesses are + // hoisted as invariant loads. If we would, we need to update the access + // function of the invariant loads as well. However, as this is not a + // very common situation, we leave this for now to avoid further + // complexity increases. + if (isUsedForIndirectHoistedLoad(this, BasePtrSAI)) + continue; + + replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI); + } + } +} + const ScopArrayInfo * Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, ArrayRef Sizes, MemoryKind Kind, @@ -3806,8 +3890,14 @@ return SAI; } -const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) { +const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr, + MemoryKind Kind) { auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get(); + return SAI; +} + +const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) { + auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind); assert(SAI && "No ScopArrayInfo available for this base pointer"); return SAI; } Index: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp +++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp @@ -1229,7 +1229,7 @@ // current SAI could be the base pointer of the derived SAI, however we // should only change the base pointer of the derived SAI if we actually // preloaded it. - if (BasePtr == MA->getBaseAddr()) { + if (BasePtr == MA->getOriginalBaseAddr()) { assert(BasePtr->getType() == PreloadVal->getType()); DerivedSAI->setBasePtr(PreloadVal); } Index: polly/trunk/lib/Transform/ScheduleOptimizer.cpp =================================================================== --- polly/trunk/lib/Transform/ScheduleOptimizer.cpp +++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp @@ -1304,7 +1304,8 @@ __isl_take isl_schedule_node *Node, const llvm::TargetTransformInfo *TTI, MatMulInfoTy &MMI) { assert(TTI && "The target transform info should be provided."); - Node = markInterIterationAliasFree(Node, MMI.WriteToC->getLatestBaseAddr()); + Node = markInterIterationAliasFree( + Node, MMI.WriteToC->getLatestScopArrayInfo()->getBasePtr()); int DimOutNum = isl_schedule_node_band_n_member(Node); assert(DimOutNum > 2 && "In case of the matrix multiplication the loop nest " "and, consequently, the corresponding scheduling " Index: polly/trunk/test/Isl/CodeGen/invariant_load_canonicalize_array_baseptrs.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/invariant_load_canonicalize_array_baseptrs.ll +++ polly/trunk/test/Isl/CodeGen/invariant_load_canonicalize_array_baseptrs.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; CHECK: %polly.access.A = getelementptr float*, float** %A, i64 0 +; CHECK: %polly.access.A.load = load float*, float** %polly.access.A +; CHECK: store float 4.200000e+01, float* %polly.access.A.load +; CHECK: store float 4.800000e+01, float* %polly.access.A.load + +define void @foo(float** %A) { +start: + br label %loop + +loop: + %indvar = phi i64 [0, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body1, label %exit + +body1: + %baseA = load float*, float** %A + store float 42.0, float* %baseA + br label %body2 + +body2: + %baseB = load float*, float** %A + store float 48.0, float* %baseB + br label %latch + +latch: + br label %loop + +exit: + ret void + +} Index: polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs.ll +++ polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; CHECK: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body1[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body1[i0] -> [i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_baseB[0] }; +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body2[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body2[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_baseB[0] }; + +define void @foo(float** %A) { +start: + br label %loop + +loop: + %indvar = phi i64 [0, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body1, label %exit + +body1: + %baseA = load float*, float** %A + store float 42.0, float* %baseA + br label %body2 + +body2: + %baseB = load float*, float** %A + store float 42.0, float* %baseB + br label %latch + +latch: + br label %loop + +exit: + ret void + +} Index: polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_2.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_2.ll +++ polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_2.ll @@ -0,0 +1,91 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; Make sure we choose a canonical element that is not the first invariant load, +; but the first that is an array base pointer. + +; CHECK: Statements { +; CHECK-NEXT: Stmt_body0 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body0[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body0[i0] -> [i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body0[i0] -> MemRef_X[0] }; +; CHECK-NEXT: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body1[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body1[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_baseB[0] }; +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body2[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body2[i0] -> [i0, 2] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_X[0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_ptr[] }; +; CHECK-NEXT: Stmt_body3 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body3[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body3[i0] -> [i0, 3] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body3[i0] -> MemRef_baseB[0] }; +; CHECK-NEXT: Stmt_body4 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body4[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body4[i0] -> [i0, 4] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body4[i0] -> MemRef_X[0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: { Stmt_body4[i0] -> MemRef_ptr[] }; +; CHECK-NEXT: } + +define void @foo(float** %A, float** %X) { +start: + br label %loop + +loop: + %indvar = phi i64 [0, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body0, label %exit + +body0: + %ptr = load float*, float** %A + store float* %ptr, float** %X + br label %body1 + +body1: + %baseA = load float*, float** %A + store float 42.0, float* %baseA + br label %body2 + +body2: + %ptr2 = load float*, float** %A + store float* %ptr, float** %X + br label %body3 + +body3: + %baseB = load float*, float** %A + store float 42.0, float* %baseB + br label %body4 + +body4: + %ptr3 = load float*, float** %A + store float* %ptr, float** %X + br label %latch + +latch: + br label %loop + +exit: + ret void + +} Index: polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_3.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_3.ll +++ polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_3.ll @@ -0,0 +1,58 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; Verify that we canonicalize accesses even tough one of the accesses (even +; the canonical base) has a partial execution context. This is correct as +; the combined execution context still coveres both accesses. + +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Execution Context: { : } +; CHECK-NEXT: } + +; CHECK: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body1[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body1[i0] -> [i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_baseB[0] }; +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body2[i0] : 0 <= i0 <= 510 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body2[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_baseB[0] }; + + +define void @foo(float** %A) { +start: + br label %loop + +loop: + %indvar = phi i64 [0, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body1, label %exit + +body1: + %baseA = load float*, float** %A + store float 42.0, float* %baseA + %cmp = icmp slt i64 %indvar.next, 512 + br i1 %cmp, label %body2, label %latch + +body2: + %baseB = load float*, float** %A + store float 42.0, float* %baseB + br label %latch + +latch: + br label %loop + +exit: + ret void + +} Index: polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4.ll +++ polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4.ll @@ -0,0 +1,54 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; Verify that a delinearized and a not delinearized access are not +; canonizalized. + +; CHECK: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_body1[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> [i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body1[i0] -> MemRef_baseB[0] }; +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [n] -> { Stmt_body2[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [n] -> { Stmt_body2[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body2[i0] -> MemRef_baseA[i0, i0] }; +; CHECK-NEXT: } + + +define void @foo(float** %A, i64 %n, i64 %m) { +start: + br label %loop + +loop: + %indvar = phi i64 [0, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body1, label %exit + +body1: + %baseB = load float*, float** %A + store float 42.0, float* %baseB + br label %body2 + +body2: + %baseA = load float*, float** %A + %offsetA = mul i64 %indvar, %n + %offsetA2 = add i64 %offsetA, %indvar + %ptrA = getelementptr float, float* %baseA, i64 %offsetA2 + store float 42.0, float* %ptrA + br label %latch + +latch: + br label %loop + +exit: + ret void + +} Index: polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4b.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4b.ll +++ polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4b.ll @@ -0,0 +1,55 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; Verify that two arrays delinearized with different sizes are not coalesced. + +; CHECK: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [m, n] -> { Stmt_body1[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [m, n] -> { Stmt_body1[i0] -> [i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [m, n] -> { Stmt_body1[i0] -> MemRef_baseB[i0, i0] }; +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: [m, n] -> { Stmt_body2[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [m, n] -> { Stmt_body2[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [m, n] -> { Stmt_body2[i0] -> MemRef_baseA[i0, i0] }; +; CHECK-NEXT: } + +define void @foo(float** %A, i64 %n, i64 %m) { +start: + br label %loop + +loop: + %indvar = phi i64 [0, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body1, label %exit + +body1: + %baseB = load float*, float** %A + %offsetB = mul i64 %indvar, %m + %offsetB2 = add i64 %offsetB, %indvar + %ptrB = getelementptr float, float* %baseB, i64 %offsetB2 + store float 42.0, float* %ptrB + br label %body2 + +body2: + %baseA = load float*, float** %A + %offsetA = mul i64 %indvar, %n + %offsetA2 = add i64 %offsetA, %indvar + %ptrA = getelementptr float, float* %baseA, i64 %offsetA2 + store float 42.0, float* %ptrA + br label %latch + +latch: + br label %loop + +exit: + ret void + +} Index: polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4c.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4c.ll +++ polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_4c.ll @@ -0,0 +1,51 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; Verify that arrays with different element types are not coalesced. + +; CHECK: Statements { +; CHECK-NEXT: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body1[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body1[i0] -> [i0, 0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_baseB[0] }; +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body2[i0] : 0 <= i0 <= 1022 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body2[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_baseA[0] }; +; CHECK-NEXT: } + +define void @foo(float** %A, i64 %n, i64 %m) { +start: + br label %loop + +loop: + %indvar = phi i64 [0, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body1, label %exit + +body1: + %baseB = load float*, float** %A + store float 42.0, float* %baseB + br label %body2 + +body2: + %baseA = load float*, float** %A + %ptrcast = bitcast float* %baseA to i64* + store i64 42, i64* %ptrcast + br label %latch + +latch: + br label %loop + +exit: + ret void + +} Index: polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_5.ll =================================================================== --- polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_5.ll +++ polly/trunk/test/ScopInfo/invariant_load_canonicalize_array_baseptrs_5.ll @@ -0,0 +1,78 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s \ +; RUN: -polly-invariant-load-hoisting \ +; RUN: | FileCheck %s + +; Verify that nested arrays with invariant base pointers are handled correctly. +; Specifically, we currently do not canonicalize arrays where some accesses are +; hoisted as invariant loads. If we would, we need to update the access function +; of the invariant loads as well. However, as this is not a very common +; situation, we leave this for now to avoid further complexity increases. +; +; In this test case the arrays baseA1 and baseA2 could be canonicalized to a +; single array, but there is also an invariant access to baseA1[0] through +; "%v0 = load float, float* %ptr" which prevents the canonicalization. + +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Execution Context: { : } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_baseA1[0] }; +; CHECK-NEXT: Execution Context: { : } +; CHECK-NEXT: } + +; CHECK: Statements { +; CHECK-NEXT: Stmt_body1 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body1[i0] : 0 <= i0 <= 1021 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body1[i0] -> [i0, 0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_baseA1[1 + i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_B[0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body1[i0] -> MemRef_B[0] }; +; CHECK-NEXT: Stmt_body2 +; CHECK-NEXT: Domain := +; CHECK-NEXT: { Stmt_body2[i0] : 0 <= i0 <= 1021 }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: { Stmt_body2[i0] -> [i0, 1] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_body2[i0] -> MemRef_baseA2[0] }; +; CHECK-NEXT: } + +define void @foo(float** %A, float* %B) { +start: + br label %loop + +loop: + %indvar = phi i64 [1, %start], [%indvar.next, %latch] + %indvar.next = add nsw i64 %indvar, 1 + %icmp = icmp slt i64 %indvar.next, 1024 + br i1 %icmp, label %body1, label %exit + +body1: + %baseA1 = load float*, float** %A + %ptr = getelementptr inbounds float, float* %baseA1, i64 %indvar + %v0 = load float, float* %ptr + %v1 = load float, float* %baseA1 + store float %v0, float* %B + store float %v1, float* %B + br label %body2 + +body2: + %baseA2 = load float*, float** %A + store float undef, float* %baseA2 + br label %body3 + +body3: + br label %latch + +latch: + br label %loop + +exit: + ret void + +}