Index: lib/Transform/MaximalStaticExpansion.cpp =================================================================== --- lib/Transform/MaximalStaticExpansion.cpp +++ lib/Transform/MaximalStaticExpansion.cpp @@ -76,12 +76,20 @@ /// Expand the read memory access. /// - /// @param The SCop in which the memory access appears in. - /// @param The memory access that need to be expanded. + /// @param S The SCop in which the memory access appears in. + /// @param MA The memory access that need to be expanded. /// @param Dependences The RAW dependences of the SCop. /// @param ExpandedSAI The expanded SAI created during write expansion. void expandRead(Scop &S, MemoryAccess *MA, isl::union_map &Dependences, ScopArrayInfo *ExpandedSAI); + + /// Filter the dependences to have only one related to current memory access. + /// + /// @param S The SCop in which the memory access appears in. + /// @param MapDependences The dependences to filter. + /// @param MA The memory access that need to be expanded. + isl::union_map filterDependences(Scop &S, isl::union_map &MapDependences, + MemoryAccess *MA); }; } // namespace @@ -146,6 +154,51 @@ char MaximalStaticExpander::ID = 0; +isl::union_map +MaximalStaticExpander::filterDependences(Scop &S, isl::union_map &Dependences, + MemoryAccess *MA) { + + isl::union_map MapDependences = isl::union_map::empty(S.getParamSpace()); + + Dependences.reverse().foreach_map( + [&MapDependences, &MA](isl::map Map) -> isl::stat { + + // Filter out Statement to Statement dependences. + if (!Map.can_curry()) + return isl::stat::ok; + + // Intersect with the relevant SAI. + auto SAI = MA->getLatestScopArrayInfo(); + auto Id = SAI->getBasePtrId(); + auto MARangeId = MA->getLatestAccessRelation().range().get_tuple_id(); + + auto TmpMap = Map.curry().range().wrapped_domain_map().curry(); + auto TmpMapDomainId = TmpMap.domain().get_tuple_id(); + + ScopArrayInfo *UserSAI = + static_cast(TmpMapDomainId.get_user()); + + if (SAI != UserSAI) + return isl::stat::ok; + + // Get the correct S1[] -> S2[] dependence. + auto NewMap = Map.factor_domain(); + auto NewMapDomainId = NewMap.domain().get_tuple_id(); + auto ReadDomainSet = MA->getAccessRelation().domain(); + auto ReadDomainId = ReadDomainSet.get_tuple_id(); + + if (ReadDomainId.keep() != NewMapDomainId.keep()) + return isl::stat::ok; + + // Add the corresponding map to MapDependences. + MapDependences = MapDependences.add_map(NewMap); + + return isl::stat::ok; + }); + + return MapDependences; +} + bool MaximalStaticExpander::isExpandable( const ScopArrayInfo *SAI, SmallPtrSetImpl &Writes, SmallPtrSetImpl &Reads, Scop &S, @@ -153,6 +206,9 @@ int NumberWrites = 0; for (ScopStmt &Stmt : S) { + auto StmtReads = isl::union_map::empty(S.getParamSpace()); + auto StmtWrites = isl::union_map::empty(S.getParamSpace()); + for (MemoryAccess *MA : Stmt) { // Check if the current MemoryAccess involved the current SAI. @@ -166,6 +222,27 @@ return false; } + // For now, we are not able to expand array where read come after write + // (to the same location) in a same statement. + auto AccRel = isl::union_map(MA->getAccessRelation()); + if (MA->isRead()) { + // Reject load after store to same location. + if (!StmtWrites.is_disjoint(AccRel)) { + emitRemark(SAI->getName() + " has read after write to the same " + "element in same statement. The " + "dependences found during analysis may " + "be wrong because Polly is not able to " + "handle such case for now.", + MA->getAccessInstruction()); + return false; + } + + StmtReads = give(isl_union_map_union(StmtReads.take(), AccRel.take())); + } else { + StmtWrites = + give(isl_union_map_union(StmtWrites.take(), AccRel.take())); + } + // For now, we are not able to expand MayWrite. if (MA->isMayWrite()) { emitRemark(SAI->getName() + " has a maywrite access.", @@ -191,15 +268,13 @@ auto StmtDomain = Stmt.getDomain(); // Get the domain of the future Read access. - auto ReadDomainSet = MA->getAccessRelation().domain(); auto ReadDomain = isl::union_set(ReadDomainSet); - auto CurrentReadWriteDependences = - Dependences.reverse().intersect_domain(ReadDomain); - auto DepsDomain = CurrentReadWriteDependences.domain(); - unsigned NumberElementMap = - isl_union_map_n_map(CurrentReadWriteDependences.get()); + // Get the dependences relevant for this MA + auto MapDependences = filterDependences(S, Dependences, MA); + auto DepsDomain = MapDependences.domain(); + unsigned NumberElementMap = isl_union_map_n_map(MapDependences.get()); // If there are multiple maps in the Deps, we cannot handle this case // for now. @@ -246,17 +321,16 @@ auto WriteDomainSet = MA->getAccessRelation().domain(); auto WriteDomain = isl::union_set(WriteDomainSet); - auto CurrentReadWriteDependences = - Dependences.reverse().intersect_domain(WriteDomain); + // Get the dependences relevant for this MA + auto MapDependences = filterDependences(S, Dependences, MA); // If no dependences, no need to modify anything. - if (CurrentReadWriteDependences.is_empty()) { + if (MapDependences.is_empty()) return; - } - assert(isl_union_map_n_map(CurrentReadWriteDependences.get()) == 1 && + assert(isl_union_map_n_map(MapDependences.get()) == 1 && "There are more than one RAW dependencies in the union map."); - auto NewAccessMap = isl::map::from_union_map(CurrentReadWriteDependences); + auto NewAccessMap = isl::map::from_union_map(MapDependences); auto Id = ExpandedSAI->getBasePtrId(); @@ -348,7 +422,7 @@ // Get the RAW Dependences. auto &DI = getAnalysis(); - auto &D = DI.getDependences(Dependences::AL_Statement); + auto &D = DI.getDependences(Dependences::AL_Reference); auto Dependences = isl::give(D.getDependences(Dependences::TYPE_RAW)); SmallPtrSet CurrentSAI(S.arrays().begin(), Index: test/MaximalStaticExpansion/load_after_store_same_statement.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/load_after_store_same_statement.ll @@ -0,0 +1,68 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE +; +; Verify that the expansion of an array with load after store in a same statement is not done. +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) { +; int i,j; +; for (i = 0; i < Ni; i++) { +; for (int j = 0; j MemRef_C_Stmt_for_body4_expanded[i0, i1] }; +; +; Check that B is not expanded +; +; CHECK-NOT: double MemRef_B_Stmt_for_body4_expanded[10000][10000]; // Element size 8 +; MSE: MemRef_B has read after write to the same element in same statement. The dependences found during analysis may be wrong because Polly is not able to handle such case for now. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @mse(double* %A, double* %B, double* %C, double* %D) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.inc9 + %i.02 = phi i32 [ 0, %entry.split ], [ %inc10, %for.inc9 ] + br label %for.body4 + +for.body4: ; preds = %for.body, %for.body4 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %conv, double* %arrayidx, align 8 + %arrayidx6 = getelementptr inbounds double, double* %B, i64 %indvars.iv + %1 = bitcast double* %arrayidx6 to i64* + %2 = load i64, i64* %1, align 8 + %arrayidx8 = getelementptr inbounds double, double* %C, i64 %indvars.iv + %3 = bitcast double* %arrayidx8 to i64* + store i64 %2, i64* %3, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.body4, label %for.inc9 + +for.inc9: ; preds = %for.body4 + %inc10 = add nuw nsw i32 %i.02, 1 + %exitcond3 = icmp ne i32 %inc10, 10000 + br i1 %exitcond3, label %for.body, label %for.end11 + +for.end11: ; preds = %for.inc9 + ret void +} Index: test/MaximalStaticExpansion/read_from_original.ll =================================================================== --- test/MaximalStaticExpansion/read_from_original.ll +++ test/MaximalStaticExpansion/read_from_original.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-canonicalize -polly-mse -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -polly-canonicalize -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE ; ; Verify that Polly detects problems and does not expand the array ; @@ -30,76 +30,43 @@ ; ; CHECK-NOT: new: { Stmt_for_body3[i0, i1] -> MemRef_B_Stmt_for_body3_expanded[i0, i1] }; ; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded - +; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function Attrs: noinline nounwind uwtable define double @mse(double* %A, double* %B) { entry: - %A.addr = alloca double*, align 8 - %B.addr = alloca double*, align 8 - %i = alloca i32, align 4 - %tmp = alloca double, align 8 - %j = alloca i32, align 4 - store double* %A, double** %A.addr, align 8 - store double* %B, double** %B.addr, align 8 - store double 6.000000e+00, double* %tmp, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc8, %entry - %0 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %0, 2000 - br i1 %cmp, label %for.body, label %for.end10 + br label %entry.split -for.body: ; preds = %for.cond - store i32 2, i32* %j, align 4 - br label %for.cond1 +entry.split: ; preds = %entry + br label %for.body -for.cond1: ; preds = %for.inc, %for.body - %1 = load i32, i32* %j, align 4 - %cmp2 = icmp slt i32 %1, 3000 - br i1 %cmp2, label %for.body3, label %for.end +for.body: ; preds = %entry.split, %for.end + %indvars.iv4 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next5, %for.end ] + br label %for.body3 -for.body3: ; preds = %for.cond1 - %2 = load i32, i32* %j, align 4 - %conv = sitofp i32 %2 to double - %3 = load double*, double** %B.addr, align 8 - %4 = load i32, i32* %j, align 4 - %sub = sub nsw i32 %4, 1 - %idxprom = sext i32 %sub to i64 - %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom +for.body3: ; preds = %for.body, %for.body3 + %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body3 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %1 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds double, double* %B, i64 %1 store double %conv, double* %arrayidx, align 8 - br label %for.inc + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 3000 + br i1 %exitcond, label %for.body3, label %for.end -for.inc: ; preds = %for.body3 - %5 = load i32, i32* %j, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond1 +for.end: ; preds = %for.body3 + %arrayidx5 = getelementptr inbounds double, double* %B, i64 %indvars.iv4 + %2 = bitcast double* %arrayidx5 to i64* + %3 = load i64, i64* %2, align 8 + %arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv4 + %4 = bitcast double* %arrayidx7 to i64* + store i64 %3, i64* %4, align 8 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + %exitcond6 = icmp ne i64 %indvars.iv.next5, 2000 + br i1 %exitcond6, label %for.body, label %for.end10 -for.end: ; preds = %for.cond1 - %6 = load double*, double** %B.addr, align 8 - %7 = load i32, i32* %i, align 4 - %idxprom4 = sext i32 %7 to i64 - %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 - %8 = load double, double* %arrayidx5, align 8 - %9 = load double*, double** %A.addr, align 8 - %10 = load i32, i32* %i, align 4 - %idxprom6 = sext i32 %10 to i64 - %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 - store double %8, double* %arrayidx7, align 8 - br label %for.inc8 - -for.inc8: ; preds = %for.end - %11 = load i32, i32* %i, align 4 - %inc9 = add nsw i32 %11, 1 - store i32 %inc9, i32* %i, align 4 - br label %for.cond - -for.end10: ; preds = %for.cond - %12 = load double, double* %tmp, align 8 - ret double %12 +for.end10: ; preds = %for.end + ret double 6.000000e+00 } - Index: test/MaximalStaticExpansion/too_many_writes.ll =================================================================== --- test/MaximalStaticExpansion/too_many_writes.ll +++ test/MaximalStaticExpansion/too_many_writes.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-canonicalize -polly-mse -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -polly-canonicalize -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1 | FileCheck %s --check-prefix=MSE +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1 | FileCheck %s --check-prefix=MSE ; ; Verify that Polly detects problems and does not expand the array ; @@ -33,79 +33,44 @@ ; ; CHECK-NOT: new: { Stmt_for_body3[i0, i1] -> MemRef_B_Stmt_for_body3_expanded[i0, i1] }; ; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded - +; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function Attrs: noinline nounwind uwtable define double @mse(double* %A, double* %B) { entry: - %A.addr = alloca double*, align 8 - %B.addr = alloca double*, align 8 - %i = alloca i32, align 4 - %tmp = alloca double, align 8 - %j = alloca i32, align 4 - store double* %A, double** %A.addr, align 8 - store double* %B, double** %B.addr, align 8 - store double 6.000000e+00, double* %tmp, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond + br label %entry.split -for.cond: ; preds = %for.inc10, %entry - %0 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %0, 2000 - br i1 %cmp, label %for.body, label %for.end12 +entry.split: ; preds = %entry + br label %for.body -for.body: ; preds = %for.cond - %1 = load double*, double** %B.addr, align 8 - %2 = load i32, i32* %i, align 4 - %idxprom = sext i32 %2 to i64 - %arrayidx = getelementptr inbounds double, double* %1, i64 %idxprom +for.body: ; preds = %entry.split, %for.end + %indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ] + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv3 store double 2.000000e+00, double* %arrayidx, align 8 - store i32 0, i32* %j, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc, %for.body - %3 = load i32, i32* %j, align 4 - %cmp2 = icmp slt i32 %3, 2000 - br i1 %cmp2, label %for.body3, label %for.end + br label %for.body3 -for.body3: ; preds = %for.cond1 - %4 = load i32, i32* %j, align 4 - %conv = sitofp i32 %4 to double - %5 = load double*, double** %B.addr, align 8 - %6 = load i32, i32* %j, align 4 - %idxprom4 = sext i32 %6 to i64 - %arrayidx5 = getelementptr inbounds double, double* %5, i64 %idxprom4 +for.body3: ; preds = %for.body, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %arrayidx5 = getelementptr inbounds double, double* %B, i64 %indvars.iv store double %conv, double* %arrayidx5, align 8 - br label %for.inc - -for.inc: ; preds = %for.body3 - %7 = load i32, i32* %j, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond1 - -for.end: ; preds = %for.cond1 - %8 = load double*, double** %B.addr, align 8 - %9 = load i32, i32* %i, align 4 - %idxprom6 = sext i32 %9 to i64 - %arrayidx7 = getelementptr inbounds double, double* %8, i64 %idxprom6 - %10 = load double, double* %arrayidx7, align 8 - %11 = load double*, double** %A.addr, align 8 - %12 = load i32, i32* %i, align 4 - %idxprom8 = sext i32 %12 to i64 - %arrayidx9 = getelementptr inbounds double, double* %11, i64 %idxprom8 - store double %10, double* %arrayidx9, align 8 - br label %for.inc10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 2000 + br i1 %exitcond, label %for.body3, label %for.end -for.inc10: ; preds = %for.end - %13 = load i32, i32* %i, align 4 - %inc11 = add nsw i32 %13, 1 - store i32 %inc11, i32* %i, align 4 - br label %for.cond +for.end: ; preds = %for.body3 + %arrayidx7 = getelementptr inbounds double, double* %B, i64 %indvars.iv3 + %1 = bitcast double* %arrayidx7 to i64* + %2 = load i64, i64* %1, align 8 + %arrayidx9 = getelementptr inbounds double, double* %A, i64 %indvars.iv3 + %3 = bitcast double* %arrayidx9 to i64* + store i64 %2, i64* %3, align 8 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next4, 2000 + br i1 %exitcond5, label %for.body, label %for.end12 -for.end12: ; preds = %for.cond - %14 = load double, double* %tmp, align 8 - ret double %14 +for.end12: ; preds = %for.end + ret double 6.000000e+00 } Index: test/MaximalStaticExpansion/working_expansion.ll =================================================================== --- test/MaximalStaticExpansion/working_expansion.ll +++ test/MaximalStaticExpansion/working_expansion.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-canonicalize -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s ; ; Verify that the accesses are correctly expanded ; @@ -27,75 +27,42 @@ ; ; CHECK: new: { Stmt_for_body3[i0, i1] -> MemRef_B_Stmt_for_body3_expanded[i0, i1] }; ; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded[i0, i0] }; - +; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function Attrs: noinline nounwind uwtable define double @mse(double* %A, double* %B) { entry: - %A.addr = alloca double*, align 8 - %B.addr = alloca double*, align 8 - %i = alloca i32, align 4 - %tmp = alloca double, align 8 - %j = alloca i32, align 4 - store double* %A, double** %A.addr, align 8 - store double* %B, double** %B.addr, align 8 - store double 6.000000e+00, double* %tmp, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc8, %entry - %0 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %0, 2000 - br i1 %cmp, label %for.body, label %for.end10 + br label %entry.split -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond1 +entry.split: ; preds = %entry + br label %for.body -for.cond1: ; preds = %for.inc, %for.body - %1 = load i32, i32* %j, align 4 - %cmp2 = icmp slt i32 %1, 3000 - br i1 %cmp2, label %for.body3, label %for.end +for.body: ; preds = %entry.split, %for.end + %indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ] + br label %for.body3 -for.body3: ; preds = %for.cond1 - %2 = load i32, i32* %j, align 4 - %conv = sitofp i32 %2 to double - %3 = load double*, double** %B.addr, align 8 - %4 = load i32, i32* %j, align 4 - %idxprom = sext i32 %4 to i64 - %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom +for.body3: ; preds = %for.body, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv store double %conv, double* %arrayidx, align 8 - br label %for.inc + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 3000 + br i1 %exitcond, label %for.body3, label %for.end -for.inc: ; preds = %for.body3 - %5 = load i32, i32* %j, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond1 +for.end: ; preds = %for.body3 + %arrayidx5 = getelementptr inbounds double, double* %B, i64 %indvars.iv3 + %1 = bitcast double* %arrayidx5 to i64* + %2 = load i64, i64* %1, align 8 + %arrayidx7 = getelementptr inbounds double, double* %A, i64 %indvars.iv3 + %3 = bitcast double* %arrayidx7 to i64* + store i64 %2, i64* %3, align 8 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next4, 2000 + br i1 %exitcond5, label %for.body, label %for.end10 -for.end: ; preds = %for.cond1 - %6 = load double*, double** %B.addr, align 8 - %7 = load i32, i32* %i, align 4 - %idxprom4 = sext i32 %7 to i64 - %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 - %8 = load double, double* %arrayidx5, align 8 - %9 = load double*, double** %A.addr, align 8 - %10 = load i32, i32* %i, align 4 - %idxprom6 = sext i32 %10 to i64 - %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 - store double %8, double* %arrayidx7, align 8 - br label %for.inc8 - -for.inc8: ; preds = %for.end - %11 = load i32, i32* %i, align 4 - %inc9 = add nsw i32 %11, 1 - store i32 %inc9, i32* %i, align 4 - br label %for.cond - -for.end10: ; preds = %for.cond - %12 = load double, double* %tmp, align 8 - ret double %12 +for.end10: ; preds = %for.end + ret double 6.000000e+00 } - Index: test/MaximalStaticExpansion/working_expansion_multiple_dependences_per_statement.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/working_expansion_multiple_dependences_per_statement.ll @@ -0,0 +1,95 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; +; Verify that the accesses are correctly expanded +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) { +; int i,j; +; for (j = 0; j < Ni; j++) { +; for (int i = 0; i MemRef_B_Stmt_for_body4_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_body9[i0, i1] -> MemRef_D_Stmt_for_body9_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_end15[i0] -> MemRef_B_Stmt_for_body4_expanded[i0, i0] }; +; CHECK: new: { Stmt_for_end15[i0] -> MemRef_A_Stmt_for_end15_expanded[i0] }; +; CHECK: new: { Stmt_for_end15[i0] -> MemRef_D_Stmt_for_body9_expanded[i0, i0] }; +; CHECK: new: { Stmt_for_end15[i0] -> MemRef_C_Stmt_for_end15_expanded[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @mse(double* %A, double* %B, double* %C, double* %D) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.end15 + %indvars.iv7 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next8, %for.end15 ] + br label %for.body4 + +for.body4: ; preds = %for.body, %for.body4 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %conv, double* %arrayidx, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.body4, label %for.end + +for.end: ; preds = %for.body4 + br label %for.body9 + +for.body9: ; preds = %for.end, %for.body9 + %indvars.iv4 = phi i64 [ 0, %for.end ], [ %indvars.iv.next5, %for.body9 ] + %1 = trunc i64 %indvars.iv4 to i32 + %conv10 = sitofp i32 %1 to double + %arrayidx12 = getelementptr inbounds double, double* %D, i64 %indvars.iv4 + store double %conv10, double* %arrayidx12, align 8 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + %exitcond6 = icmp ne i64 %indvars.iv.next5, 10000 + br i1 %exitcond6, label %for.body9, label %for.end15 + +for.end15: ; preds = %for.body9 + %arrayidx17 = getelementptr inbounds double, double* %B, i64 %indvars.iv7 + %2 = bitcast double* %arrayidx17 to i64* + %3 = load i64, i64* %2, align 8 + %arrayidx19 = getelementptr inbounds double, double* %A, i64 %indvars.iv7 + %4 = bitcast double* %arrayidx19 to i64* + store i64 %3, i64* %4, align 8 + %arrayidx21 = getelementptr inbounds double, double* %D, i64 %indvars.iv7 + %5 = bitcast double* %arrayidx21 to i64* + %6 = load i64, i64* %5, align 8 + %arrayidx23 = getelementptr inbounds double, double* %C, i64 %indvars.iv7 + %7 = bitcast double* %arrayidx23 to i64* + store i64 %6, i64* %7, align 8 + %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 + %exitcond9 = icmp ne i64 %indvars.iv.next8, 10000 + br i1 %exitcond9, label %for.body, label %for.end26 + +for.end26: ; preds = %for.end15 + ret void +} Index: test/MaximalStaticExpansion/working_expansion_multiple_instruction_per_statement.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/working_expansion_multiple_instruction_per_statement.ll @@ -0,0 +1,84 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; +; Verify that the accesses are correctly expanded +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; void mse(double A[Ni], double B[Nj], double C[Nj], double D[Nj]) { +; int i,j; +; for (j = 0; j < Nj; j++) { +; for (int i = 0; i MemRef_B_Stmt_for_body4_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_body4[i0, i1] -> MemRef_D_Stmt_for_body4_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body4_expanded[i0, i0] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_A_Stmt_for_end_expanded[i0] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_D_Stmt_for_body4_expanded[i0, i0] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_C_Stmt_for_end_expanded[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @mse(double* %A, double* %B, double* %C, double* %D) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.end + %indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ] + br label %for.body4 + +for.body4: ; preds = %for.body, %for.body4 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body4 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %conv, double* %arrayidx, align 8 + %1 = trunc i64 %indvars.iv to i32 + %conv5 = sitofp i32 %1 to double + %arrayidx7 = getelementptr inbounds double, double* %D, i64 %indvars.iv + store double %conv5, double* %arrayidx7, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.body4, label %for.end + +for.end: ; preds = %for.body4 + %arrayidx9 = getelementptr inbounds double, double* %B, i64 %indvars.iv3 + %2 = bitcast double* %arrayidx9 to i64* + %3 = load i64, i64* %2, align 8 + %arrayidx11 = getelementptr inbounds double, double* %A, i64 %indvars.iv3 + %4 = bitcast double* %arrayidx11 to i64* + store i64 %3, i64* %4, align 8 + %arrayidx13 = getelementptr inbounds double, double* %D, i64 %indvars.iv3 + %5 = bitcast double* %arrayidx13 to i64* + %6 = load i64, i64* %5, align 8 + %arrayidx15 = getelementptr inbounds double, double* %C, i64 %indvars.iv3 + %7 = bitcast double* %arrayidx15 to i64* + store i64 %6, i64* %7, align 8 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next4, 10000 + br i1 %exitcond5, label %for.body, label %for.end18 + +for.end18: ; preds = %for.end + ret void +}