Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -55,6 +55,7 @@ llvm::Pass *createIslScheduleOptimizerPass(); llvm::Pass *createFlattenSchedulePass(); llvm::Pass *createDeLICMPass(); +llvm::Pass *createMaximalStaticExpansionPass(); extern char &CodePreparationID; } // namespace polly @@ -88,6 +89,7 @@ polly::createPPCGCodeGenerationPass(); #endif polly::createIslScheduleOptimizerPass(); + polly::createMaximalStaticExpansionPass(); polly::createFlattenSchedulePass(); polly::createDeLICMPass(); polly::createDumpModulePass("", true); @@ -109,6 +111,7 @@ void initializePPCGCodeGenerationPass(llvm::PassRegistry &); #endif void initializeIslScheduleOptimizerPass(llvm::PassRegistry &); +void initializeMaximalStaticExpanderPass(llvm::PassRegistry &); void initializePollyCanonicalizePass(llvm::PassRegistry &); void initializeFlattenSchedulePass(llvm::PassRegistry &); void initializeDeLICMPass(llvm::PassRegistry &); Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -60,6 +60,7 @@ Transform/FlattenAlgo.cpp Transform/DeLICM.cpp Transform/Simplify.cpp + Transform/MaximalStaticExpansion.cpp ${POLLY_HEADER_FILES} ) set_target_properties(PollyCore PROPERTIES FOLDER "Polly") Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -140,6 +140,11 @@ cl::desc("Import the polyhedral description of the detected Scops"), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt FullyIndexedStaticExpansion( + "polly-mse", + cl::desc("Fully expand the memory accesses of the detected Scops"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt ExportJScop( "polly-export", cl::desc("Export the polyhedral description of the detected Scops"), @@ -237,6 +242,7 @@ initializeDependenceInfoWrapperPassPass(Registry); initializeJSONExporterPass(Registry); initializeJSONImporterPass(Registry); + initializeMaximalStaticExpanderPass(Registry); initializeIslAstInfoWrapperPassPass(Registry); initializeIslScheduleOptimizerPass(Registry); initializePollyCanonicalizePass(Registry); @@ -313,6 +319,9 @@ if (DeadCodeElim) PM.add(polly::createDeadCodeElimPass()); + if (FullyIndexedStaticExpansion) + PM.add(polly::createMaximalStaticExpansionPass()); + if (EnablePruneUnprofitable) PM.add(polly::createPruneUnprofitablePass()); Index: lib/Transform/MaximalStaticExpansion.cpp =================================================================== --- /dev/null +++ lib/Transform/MaximalStaticExpansion.cpp @@ -0,0 +1,400 @@ +//===---------------- MaximalStaticExpansion.cpp -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass fully expand the memory accesses of a Scop to get rid of +// dependencies. +// +//===----------------------------------------------------------------------===// + +#include "polly/DependenceInfo.h" +#include "polly/FlattenAlgo.h" +#include "polly/LinkAllPasses.h" +#include "polly/Options.h" +#include "polly/ScopInfo.h" +#include "polly/Support/GICHelper.h" +#include "polly/Support/ISLOStream.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace polly; + +#define DEBUG_TYPE "polly-mse" + +namespace { +class MaximalStaticExpander : public ScopPass { +public: + static char ID; + explicit MaximalStaticExpander() : ScopPass(ID) {} + + ~MaximalStaticExpander() {} + + /// Expand the accesses of the SCoP. + /// + /// @param S The SCoP that must be expanded. + bool runOnScop(Scop &S) override; + + /// Print the SCoP. + /// + /// @param OS The stream where to print. + /// @param S The SCop that must be printed. + void printScop(raw_ostream &OS, Scop &S) const override; + + /// Register all analyses and transformations required. + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + /// OptimizationRemarkEmitter object for displaying diagnostic remarks + OptimizationRemarkEmitter *ORE; + + /// Emit remark + void emitRemark(Scop &S, std::string Msg); + + /// Return true if the SAI in parameter is expandable. + /// + /// @param SAI the SAI that need to be checked. + /// @param Writes A set that will contains all the write accesses. + /// @param Reads A set that will contains all the read accesses. + /// @param S The SCop in which the SAI is in. + /// @param Dependences The RAW dependences of the SCop. + bool isExpandable(const ScopArrayInfo *SAI, + SmallPtrSet &Writes, + SmallPtrSet &Reads, Scop &S, + isl::union_map &Dependences); + + /// Expand a write memory access. + /// + /// @param S The SCop in which the memory access appears in. + /// @param MA The memory access that need to be expanded. + ScopArrayInfo *expandWrite(Scop &S, MemoryAccess *MA); + + /// Expand the read memory access. + /// + /// @param The SCop in which the memory access appears in. + /// @param The memory access that need to be expanded. + /// @param Dependences The RAW dependences of the SCop. + /// @param ExpandedSAI The expanded SAI created during write expansion. + void expandRead(Scop &S, MemoryAccess *MA, isl::union_map &Dependences, + ScopArrayInfo *ExpandedSAI); +}; +} // namespace + +namespace { + +/// Whether a dimension of a set is bounded (lower and upper) by a constant, +/// i.e. there are two constants Min and Max, such that every value x of the +/// chosen dimensions is Min <= x <= Max. +bool isDimBoundedByConstant(isl::set Set, unsigned dim) { + auto ParamDims = Set.dim(isl::dim::param); + Set = Set.project_out(isl::dim::param, 0, ParamDims); + Set = Set.project_out(isl::dim::set, 0, dim); + auto SetDims = Set.dim(isl::dim::set); + Set = Set.project_out(isl::dim::set, 1, SetDims - 1); + return bool(Set.is_bounded()); +} + +/// If @p PwAff maps to a constant, return said constant. If @p Max/@p Min, it +/// can also be a piecewise constant and it would return the minimum/maximum +/// value. Otherwise, return NaN. +isl::val getConstant(isl::pw_aff PwAff, bool Max, bool Min) { + assert(!Max || !Min); + isl::val Result; + PwAff.foreach_piece([=, &Result](isl::set Set, isl::aff Aff) -> isl::stat { + if (Result && Result.is_nan()) + return isl::stat::ok; + + // TODO: If Min/Max, we can also determine a minimum/maximum value if + // Set is constant-bounded. + if (!Aff.is_cst()) { + Result = isl::val::nan(Aff.get_ctx()); + return isl::stat::error; + } + + auto ThisVal = Aff.get_constant_val(); + if (!Result) { + Result = ThisVal; + return isl::stat::ok; + } + + if (Result.eq(ThisVal)) + return isl::stat::ok; + + if (Max && ThisVal.gt(Result)) { + Result = ThisVal; + return isl::stat::ok; + } + + if (Min && ThisVal.lt(Result)) { + Result = ThisVal; + return isl::stat::ok; + } + + // Not compatible + Result = isl::val::nan(Aff.get_ctx()); + return isl::stat::error; + }); + return Result; +} + +} // namespace + +char MaximalStaticExpander::ID = 0; + +bool MaximalStaticExpander::isExpandable(const ScopArrayInfo *SAI, + SmallPtrSet &Writes, + SmallPtrSet &Reads, + Scop &S, isl::union_map &Dependences) { + + int NumberWrites = 0; + for (ScopStmt &Stmt : S) { + for (MemoryAccess *MA : Stmt) { + + // Check if the current MemoryAccess involved the current SAI. + if (SAI != MA->getLatestScopArrayInfo()) + continue; + + // For now, we are not able to expand Scalar. + if (MA->isLatestScalarKind()) { + errs() << "MSE ERROR : " << SAI->getName() << " is a Scalar access. \n"; + return false; + } + + // For now, we are not able to expand MayWrite. + if (MA->isMayWrite()) { + errs() << "MSE ERROR : " << SAI->getName() + << " has a maywrite access. \n"; + return false; + } + + // For now, we are not able to expand SAI with more than one write. + if (MA->isMustWrite()) { + Writes.insert(MA); + NumberWrites++; + if (NumberWrites > 1) { + errs() << "MSE ERROR : " << SAI->getName() + << " has more than 1 write access. \n"; + return false; + } + } + + // Check if it is possible to extand this read. + if (MA->isRead()) { + + // Get the domain of the current ScopStmt. + auto StmtDomain = isl::give(Stmt.getDomain()); + + // Get the domain of the future Read access. + auto ReadDomainSet = isl::give(isl_map_domain(MA->getAccessRelation())); + auto ReadDomain = isl::union_set(ReadDomainSet); + auto CurrentReadWriteDependences = + Dependences.reverse().intersect_domain(ReadDomain); + auto DepsDomain = CurrentReadWriteDependences.domain(); + + unsigned NumberElementMap = + isl_union_map_n_map(CurrentReadWriteDependences.get()); + + // If there are multiple maps in the Deps, we cannot handle this case + // for now. + if (NumberElementMap != 1) { + errs() << "MSE ERROR : " << SAI->getName() + << " has too many dependences to be handle for now. \n"; + return false; + } + + auto DepsDomainSet = isl::set(DepsDomain); + + // Partial read accesses are not handled by Polly. + if (!StmtDomain.is_subset(DepsDomainSet)) { + errs() << "MSE ERROR : " << SAI->getName() + << " expansion leads to a partial read access. \n"; + return false; + } + + Reads.insert(MA); + } + } + } + + // No need to expand SAI with no write. + if (NumberWrites == 0) { + errs() << "MSE ERROR : " << SAI->getName() << " has 0 write access. \n"; + return false; + } + + return true; +} + +void MaximalStaticExpander::expandRead(Scop &S, MemoryAccess *MA, + isl::union_map &Dependences, + ScopArrayInfo *ExpandedSAI) { + + // Get the current AM. + auto CurrentAccessMap = isl::give(MA->getAccessRelation()); + + // Get RAW dependences for the current WA. + auto WriteDomainSet = isl::give(isl_map_domain(MA->getAccessRelation())); + auto WriteDomain = isl::union_set(WriteDomainSet); + + auto CurrentReadWriteDependences = + Dependences.reverse().intersect_domain(WriteDomain); + + // If no dependences, no need to modify anything. + if (CurrentReadWriteDependences.is_empty()) { + return; + } + + auto NewAccessMap = isl::map::from_union_map(CurrentReadWriteDependences); + + auto CurrentStmtDomain = NewAccessMap.range(); + auto CurrentStmtName = CurrentStmtDomain.get_tuple_name(); + auto CurrentOutId = CurrentAccessMap.get_tuple_id(isl::dim::out); + std::string CurrentOutIdString = + MA->getScopArrayInfo()->getName() + "_" + CurrentStmtName + "_expanded"; + + auto Id = isl::give(ExpandedSAI->getBasePtrId()); + + // Replace the out tuple id with the one of the access array. + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, Id); + + // Set the new access relation. + MA->setNewAccessRelation(NewAccessMap.copy()); +} + +ScopArrayInfo *MaximalStaticExpander::expandWrite(Scop &S, MemoryAccess *MA) { + + // Get the current AM. + auto CurrentAccessMap = isl::give(MA->getAccessRelation()); + + unsigned in_dimensions = CurrentAccessMap.dim(isl::dim::in); + + // Get domain from the current AM. + auto Domain = CurrentAccessMap.domain(); + + // Create a new AM from the domain. + auto NewAccessMap = isl::map::from_domain(Domain); + + // Add dimensions to the new AM according to the current in_dim. + NewAccessMap = NewAccessMap.add_dims(isl::dim::out, in_dimensions); + + // Create the string representing the name of the new SAI. + // One new SAI for each statement so that each write go to a different memory + // cell. + auto CurrentStmtDomain = isl::give(MA->getStatement()->getDomain()); + auto CurrentStmtName = CurrentStmtDomain.get_tuple_name(); + auto CurrentOutId = CurrentAccessMap.get_tuple_id(isl::dim::out); + std::string CurrentOutIdString = + MA->getScopArrayInfo()->getName() + "_" + CurrentStmtName + "_expanded"; + + // Set the tuple id for the out dimension. + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, CurrentOutId); + + // Create the size vector. + std::vector Sizes; + for (unsigned i = 0; i < in_dimensions; i++) { + assert(isDimBoundedByConstant(CurrentStmtDomain, i) && + "Domain boundary are not constant."); + auto UpperBound = getConstant(CurrentStmtDomain.dim_max(i), true, false); + assert(!UpperBound.is_null() && UpperBound.is_pos() && + !UpperBound.is_nan() && + "The upper bound is not a positive integer."); + Sizes.push_back(UpperBound.get_num_si() + 1); + } + + // Get the ElementType of the current SAI. + auto ElementType = MA->getLatestScopArrayInfo()->getElementType(); + + // Create (or get if already existing) the new expanded SAI. + auto ExpandedSAI = + S.createScopArrayInfo(ElementType, CurrentOutIdString.c_str(), Sizes); + ExpandedSAI->setIsOnHeap(true); + + // Get the out Id of the expanded Array. + auto NewOutId = isl::give(ExpandedSAI->getBasePtrId()); + + // Set the out id of the new AM to the new SAI id. + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, NewOutId); + + // Add constraints to linked output with input id. + auto SpaceMap = NewAccessMap.get_space(); + auto ls = isl::local_space(SpaceMap); + for (unsigned dim = 0; dim < in_dimensions; dim++) { + auto Constraints = isl::constraint::alloc_equality(ls); + Constraints = Constraints.set_coefficient_si(isl::dim::out, dim, 1); + Constraints = Constraints.set_coefficient_si(isl::dim::in, dim, -1); + NewAccessMap = NewAccessMap.add_constraint(Constraints); + } + + // Set the new access relation map. + MA->setNewAccessRelation(NewAccessMap.copy()); + + return ExpandedSAI; +} + +void MaximalStaticExpander::emitRemark(Scop &S, std::string Msg) { + auto BB = S.getEnteringBlock(); + auto Loc = BB->getTerminator()->getDebugLoc(); + ORE->emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB) + << Msg); +} + +bool MaximalStaticExpander::runOnScop(Scop &S) { + + // Get the ORE from Optimizationremarkemitterwrapperpass. + ORE = &(getAnalysis().getORE()); + emitRemark(S, "Test remarks"); + + // Get the RAW Dependences. + auto &DI = getAnalysis(); + auto &D = DI.getDependences(Dependences::AL_Statement); + auto Dependences = isl::give(D.getDependences(Dependences::TYPE_RAW)); + + SmallPtrSet CurrentSAI; + + for (auto &SAI : S.arrays()) { + CurrentSAI.insert(SAI); + } + + for (auto SAI : CurrentSAI) { + SmallPtrSet AllWrites; + SmallPtrSet AllReads; + if (!isExpandable(SAI, AllWrites, AllReads, S, Dependences)) + continue; + + assert(AllWrites.size() == 1); + + auto TheWrite = *(AllWrites.begin()); + ScopArrayInfo *ExpandedArray = expandWrite(S, TheWrite); + + for (MemoryAccess *MA : AllReads) + expandRead(S, MA, Dependences, ExpandedArray); + } + + return false; +} + +void MaximalStaticExpander::printScop(raw_ostream &OS, Scop &S) const { + S.print(OS); +} + +void MaximalStaticExpander::getAnalysisUsage(AnalysisUsage &AU) const { + ScopPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addRequired(); +} + +Pass *polly::createMaximalStaticExpansionPass() { + return new MaximalStaticExpander(); +} + +INITIALIZE_PASS_BEGIN(MaximalStaticExpander, "polly-opt-mse", + "Polly - Maximal static expansion of SCoP", false, false); +INITIALIZE_PASS_DEPENDENCY(DependenceInfo); +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass); +INITIALIZE_PASS_END(MaximalStaticExpander, "polly-opt-mse", + "Polly - Maximal static expansion of SCoP", false, false) Index: test/MaximalStaticExpansion/partial_access.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/partial_access.ll @@ -0,0 +1,106 @@ +; RUN: opt -polly-canonicalize %loadPolly -analyze -polly-opt-mse < %s 2>&1 | FileCheck %s +; +; Verify that Polly detects problems and does not expand the array +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; double mse(double A[Ni], double B[Nj]) { +; int i; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; for (int j = 2; j MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define double @mse(double* %A, double* %B) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + %j = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store double 6.000000e+00, double* %tmp, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc8, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 2000 + br i1 %cmp, label %for.body, label %for.end10 + +for.body: ; preds = %for.cond + store i32 2, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 3000 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %2 = load i32, i32* %j, align 4 + %conv = sitofp i32 %2 to double + %3 = load double*, double** %B.addr, align 8 + %4 = load i32, i32* %j, align 4 + %sub = sub nsw i32 %4, 1 + %idxprom = sext i32 %sub to i64 + %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom + store double %conv, double* %arrayidx, align 8 + br label %for.inc + +for.inc: ; preds = %for.body3 + %5 = load i32, i32* %j, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %6 = load double*, double** %B.addr, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 + %8 = load double, double* %arrayidx5, align 8 + %9 = load double*, double** %A.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 + store double %8, double* %arrayidx7, align 8 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %11 = load i32, i32* %i, align 4 + %inc9 = add nsw i32 %11, 1 + store i32 %inc9, i32* %i, align 4 + br label %for.cond + +for.end10: ; preds = %for.cond + %12 = load double, double* %tmp, align 8 + ret double %12 +} + Index: test/MaximalStaticExpansion/too_many_writes.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/too_many_writes.ll @@ -0,0 +1,113 @@ +; RUN: opt -polly-canonicalize %loadPolly -analyze -polly-opt-mse < %s 2>&1 | FileCheck %s +; +; Verify that Polly detects problems and does not expand the array +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; double mse(double A[Ni], double B[Nj]) { +; int i; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; B[i] += 2; +; for (int j = 2; j MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define double @mse(double* %A, double* %B) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + %j = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store double 6.000000e+00, double* %tmp, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc10, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 2000 + br i1 %cmp, label %for.body, label %for.end12 + +for.body: ; preds = %for.cond + %1 = load double*, double** %B.addr, align 8 + %2 = load i32, i32* %i, align 4 + %idxprom = sext i32 %2 to i64 + %arrayidx = getelementptr inbounds double, double* %1, i64 %idxprom + %3 = load double, double* %arrayidx, align 8 + %add = fadd double %3, 2.000000e+00 + store double %add, double* %arrayidx, align 8 + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %4 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %4, 3000 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %5 = load i32, i32* %j, align 4 + %conv = sitofp i32 %5 to double + %6 = load double*, double** %B.addr, align 8 + %7 = load i32, i32* %j, align 4 + %idxprom4 = sext i32 %7 to i64 + %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 + store double %conv, double* %arrayidx5, align 8 + br label %for.inc + +for.inc: ; preds = %for.body3 + %8 = load i32, i32* %j, align 4 + %inc = add nsw i32 %8, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %9 = load double*, double** %B.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 + %11 = load double, double* %arrayidx7, align 8 + %12 = load double*, double** %A.addr, align 8 + %13 = load i32, i32* %i, align 4 + %idxprom8 = sext i32 %13 to i64 + %arrayidx9 = getelementptr inbounds double, double* %12, i64 %idxprom8 + store double %11, double* %arrayidx9, align 8 + br label %for.inc10 + +for.inc10: ; preds = %for.end + %14 = load i32, i32* %i, align 4 + %inc11 = add nsw i32 %14, 1 + store i32 %inc11, i32* %i, align 4 + br label %for.cond + +for.end12: ; preds = %for.cond + %15 = load double, double* %tmp, align 8 + ret double %15 +} Index: test/MaximalStaticExpansion/working_expansion.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/working_expansion.ll @@ -0,0 +1,101 @@ +; RUN: opt -polly-canonicalize %loadPolly -analyze -polly-opt-mse < %s | FileCheck %s +; +; Verify that the accesses are correctly expanded +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; double mse(double A[Ni], double B[Nj]) { +; int i; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; for (int j = 0; j MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_body3_expanded[i0, i0] }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define double @mse(double* %A, double* %B) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + %j = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store double 6.000000e+00, double* %tmp, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc8, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 2000 + br i1 %cmp, label %for.body, label %for.end10 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 3000 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %2 = load i32, i32* %j, align 4 + %conv = sitofp i32 %2 to double + %3 = load double*, double** %B.addr, align 8 + %4 = load i32, i32* %j, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom + store double %conv, double* %arrayidx, align 8 + br label %for.inc + +for.inc: ; preds = %for.body3 + %5 = load i32, i32* %j, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + %6 = load double*, double** %B.addr, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %arrayidx5 = getelementptr inbounds double, double* %6, i64 %idxprom4 + %8 = load double, double* %arrayidx5, align 8 + %9 = load double*, double** %A.addr, align 8 + %10 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %arrayidx7 = getelementptr inbounds double, double* %9, i64 %idxprom6 + store double %8, double* %arrayidx7, align 8 + br label %for.inc8 + +for.inc8: ; preds = %for.end + %11 = load i32, i32* %i, align 4 + %inc9 = add nsw i32 %11, 1 + store i32 %inc9, i32* %i, align 4 + br label %for.cond + +for.end10: ; preds = %for.cond + %12 = load double, double* %tmp, align 8 + ret double %12 +} +