Index: include/polly/LinkAllPasses.h =================================================================== --- include/polly/LinkAllPasses.h +++ include/polly/LinkAllPasses.h @@ -55,6 +55,7 @@ llvm::Pass *createIslScheduleOptimizerPass(); llvm::Pass *createFlattenSchedulePass(); llvm::Pass *createDeLICMPass(); +llvm::Pass *createMaximalStaticExpansionPass(); extern char &CodePreparationID; } // namespace polly @@ -88,6 +89,7 @@ polly::createPPCGCodeGenerationPass(); #endif polly::createIslScheduleOptimizerPass(); + polly::createMaximalStaticExpansionPass(); polly::createFlattenSchedulePass(); polly::createDeLICMPass(); polly::createDumpModulePass("", true); @@ -109,6 +111,7 @@ void initializePPCGCodeGenerationPass(llvm::PassRegistry &); #endif void initializeIslScheduleOptimizerPass(llvm::PassRegistry &); +void initializeMaximalStaticExpanderPass(llvm::PassRegistry &); void initializePollyCanonicalizePass(llvm::PassRegistry &); void initializeFlattenSchedulePass(llvm::PassRegistry &); void initializeDeLICMPass(llvm::PassRegistry &); Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -60,6 +60,7 @@ Transform/FlattenAlgo.cpp Transform/DeLICM.cpp Transform/Simplify.cpp + Transform/MaximalStaticExpansion.cpp ${POLLY_HEADER_FILES} ) set_target_properties(PollyCore PROPERTIES FOLDER "Polly") Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -140,6 +140,11 @@ cl::desc("Import the polyhedral description of the detected Scops"), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt FullyIndexedStaticExpansion( + "polly-mse", + cl::desc("Fully expand the memory accesses of the detected Scops"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt ExportJScop( "polly-export", cl::desc("Export the polyhedral description of the detected Scops"), @@ -237,6 +242,7 @@ initializeDependenceInfoWrapperPassPass(Registry); initializeJSONExporterPass(Registry); initializeJSONImporterPass(Registry); + initializeMaximalStaticExpanderPass(Registry); initializeIslAstInfoWrapperPassPass(Registry); initializeIslScheduleOptimizerPass(Registry); initializePollyCanonicalizePass(Registry); @@ -310,6 +316,9 @@ if (ImportJScop) PM.add(polly::createJSONImporterPass()); + if (FullyIndexedStaticExpansion) + PM.add(polly::createMaximalStaticExpansionPass()); + if (DeadCodeElim) PM.add(polly::createDeadCodeElimPass()); Index: lib/Transform/MaximalStaticExpansion.cpp =================================================================== --- /dev/null +++ lib/Transform/MaximalStaticExpansion.cpp @@ -0,0 +1,166 @@ +//===- MaximalStaticExpansion.cpp - Expand the memory access -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass fully expand the memory accesses of a Scop to get rid of +// dependencies. +//===----------------------------------------------------------------------===// + +#include "polly/DependenceInfo.h" +#include "polly/LinkAllPasses.h" +#include "polly/Options.h" +#include "polly/ScopInfo.h" +#include "polly/Support/GICHelper.h" +#include "polly/Support/ISLOStream.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace polly; + +namespace { +class MaximalStaticExpander : public ScopPass { +public: + static char ID; + explicit MaximalStaticExpander() : ScopPass(ID) {} + + ~MaximalStaticExpander() {} + + /// Expand the scalar write of the SCoP @p S. + bool runOnScop(Scop &S) override; + + /// Print the new schedule for the SCoP @p S. + void printScop(raw_ostream &OS, Scop &S) const override; + + /// Register all analyses and transformation required. + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool expandWrite(Scop &S, MemoryAccess *MA); + + bool expandRead(Scop &S); +}; +} // namespace + +char MaximalStaticExpander::ID = 0; + +bool MaximalStaticExpander::expandRead(Scop &S) { return true; } + +bool MaximalStaticExpander::expandWrite(Scop &S, MemoryAccess *MA) { + + // Get the current AM + isl_map *CurrentAccessMap = MA->getAccessRelation(); + + // Get in_dimension of the current AM + unsigned in_dimensions = isl_map_dim(CurrentAccessMap, isl_dim_in); + + // Get domain from the current AM + auto Domain = isl_map_domain(CurrentAccessMap); + + // Create a new AM from the domain + isl_map *NewAccessMap = isl_map_from_domain(Domain); + + // Add dimensions to the new AM according to the current in_dim + // Fully indexed expansion + NewAccessMap = isl_map_add_dims(NewAccessMap, isl_dim_out, in_dimensions); + + // Create the string representing the name of the new SAI + auto CurrentOutId = isl_map_get_tuple_id(CurrentAccessMap, isl_dim_out); + std::string CurrentOutIdString = MA->getScopArrayInfo()->getName() + + std::to_string(in_dimensions) + "_expanded"; + + // Set the tuple id for the out dimension + NewAccessMap = isl_map_set_tuple_id(NewAccessMap, isl_dim_out, CurrentOutId); + + // Create the size vector + std::vector SCEVSizes; + auto ScopStmt = MA->getStatement(); + for (unsigned i = 0; i < ScopStmt->getNumIterators(); i++) { + auto Loop = ScopStmt->getLoopForDimension(i); + auto SCEV = S.getSE()->getMaxBackedgeTakenCount(Loop); + SCEVSizes.push_back(SCEV); + } + + // Get the ElementType of the current SAI + auto ElementType = MA->getOriginalScopArrayInfo()->getElementType(); + + // Create the new expanded SAI + auto ExpandedSAI = + S.getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes, + MemoryKind::Array, CurrentOutIdString.c_str()); + ExpandedSAI->setIsOnHeap(true); + + // Get the out Id of the expanded Array + isl_id *NewOutId = ExpandedSAI->getBasePtrId(); + + // Set the out id of the new AM to the new SAI out id + NewAccessMap = isl_map_set_tuple_id(NewAccessMap, isl_dim_out, NewOutId); + + // Copy the isl_ids for the parameter dimensions to the new AM + for (unsigned i = 0; i < isl_map_dim(CurrentAccessMap, isl_dim_param); i++) { + isl_id *Id = isl_map_get_dim_id(CurrentAccessMap, isl_dim_param, i); + NewAccessMap = isl_map_set_dim_id(NewAccessMap, isl_dim_param, i, Id); + } + + // Set the new access relation map + MA->setNewAccessRelation(NewAccessMap); + + return true; +} + +// For now, this pass is just expanding the write access of scalar. +// For instance, the folowing code : +// +// for(int i = 0; iisWrite()) { + CorrectWrite = expandWrite(S, MA); + if (!CorrectWrite) { + return false; + } + } else if (MA->isRead()) { + CorrectRead = expandRead(S); + if (!CorrectRead) { + return false; + } + } + } + } + return true; +} + +void MaximalStaticExpander::printScop(raw_ostream &OS, Scop &S) const { + S.dump(); +} + +void MaximalStaticExpander::getAnalysisUsage(AnalysisUsage &AU) const { + ScopPass::getAnalysisUsage(AU); +} + +Pass *polly::createMaximalStaticExpansionPass() { + return new MaximalStaticExpander(); +} + +INITIALIZE_PASS_BEGIN(MaximalStaticExpander, "polly-opt-mse", + "Polly - Maximal static expansion of SCoP", false, false); +INITIALIZE_PASS_DEPENDENCY(DependenceInfo); +INITIALIZE_PASS_END(MaximalStaticExpander, "polly-opt-mse", + "Polly - Maximal static expansion of SCoP", false, false) Index: test/MaximalStaticExpansion/mse___%for.cond1.preheader---%for.end8.jscop =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/mse___%for.cond1.preheader---%for.end8.jscop @@ -0,0 +1,79 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + }, + { + "name" : "MemRef_B", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "{ : }", + "name" : "%for.cond1.preheader---%for.end8", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_cond1_preheader[i0] -> MemRef_tmp_04__phi[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_cond1_preheader[i0] -> MemRef_tmp_11__phi[] }" + } + ], + "domain" : "{ Stmt_for_cond1_preheader[i0] : 0 <= i0 <= 1999 }", + "name" : "Stmt_for_cond1_preheader", + "schedule" : "{ Stmt_for_cond1_preheader[i0] -> [i0, 0, 0] }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_tmp_11__phi[] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_tmp_11__phi[] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_B[1 + i0] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_mul_lcssa__phi[] }" + } + ], + "domain" : "{ Stmt_for_body3[i0, i1] : 0 <= i0 <= 1999 and 0 <= i1 <= 1999 }", + "name" : "Stmt_for_body3", + "schedule" : "{ Stmt_for_body3[i0, i1] -> [i0, 1, i1] }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "{ Stmt_for_inc6[i0] -> MemRef_tmp_04__phi[] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt_for_inc6[i0] -> MemRef_mul_lcssa__phi[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_inc6[i0] -> MemRef_mul_lcssa[] }" + } + ], + "domain" : "{ Stmt_for_inc6[i0] : 0 <= i0 <= 1999 }", + "name" : "Stmt_for_inc6", + "schedule" : "{ Stmt_for_inc6[i0] -> [i0, 2, 0] }" + } + ] +} Index: test/MaximalStaticExpansion/write_accesses_expanded.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/write_accesses_expanded.ll @@ -0,0 +1,107 @@ +; RUN: opt -polly-canonicalize %loadPolly -analyze -polly-opt-mse < %s 2>&1 >/dev/null | FileCheck %s +; +; Verify that the write access are correctly expanded +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; double mse(double A[Ni], double B[Nj]) { +; int i; +; double tmp = 2; +; for (i = 0; i < Ni; i++) { +; for (int j = 0; j MemRef_tmp_11__phi1_expanded[o0] }; +; CHECK: new: { Stmt_for_body3[i0, i1] -> MemRef_tmp_11__phi2_expanded[o0, o1] }; +; CHECK: new: { Stmt_for_body3[i0, i1] -> MemRef_B2_expanded[o0, o1] }; +; CHECK: new: { Stmt_for_body3[i0, i1] -> MemRef_mul_lcssa__phi2_expanded[o0, o1] }; +; CHECK: new: { Stmt_for_inc6[i0] -> MemRef_tmp_04__phi1_expanded[o0] }; +; CHECK: new: { Stmt_for_inc6[i0] -> MemRef_mul_lcssa1_expanded[o0] }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define double @mse(double* %A, double* %B) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %i = alloca i32, align 4 + %tmp = alloca double, align 8 + %j = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store double 2.000000e+00, double* %tmp, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 2000 + br i1 %cmp, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + store i32 0, i32* %j, align 4 + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %1 = load i32, i32* %j, align 4 + %cmp2 = icmp slt i32 %1, 3000 + br i1 %cmp2, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %2 = load double*, double** %A.addr, align 8 + %3 = load i32, i32* %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds double, double* %2, i64 %idxprom + %4 = load double, double* %arrayidx, align 8 + %5 = load double, double* %tmp, align 8 + %mul = fmul double %4, %5 + store double %mul, double* %tmp, align 8 + %6 = load double, double* %tmp, align 8 + %7 = load double*, double** %B.addr, align 8 + %8 = load i32, i32* %i, align 4 + %add = add nsw i32 %8, 1 + %idxprom4 = sext i32 %add to i64 + %arrayidx5 = getelementptr inbounds double, double* %7, i64 %idxprom4 + store double %6, double* %arrayidx5, align 8 + br label %for.inc + +for.inc: ; preds = %for.body3 + %9 = load i32, i32* %j, align 4 + %inc = add nsw i32 %9, 1 + store i32 %inc, i32* %j, align 4 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %10 = load i32, i32* %i, align 4 + %inc7 = add nsw i32 %10, 1 + store i32 %inc7, i32* %i, align 4 + br label %for.cond + +for.end8: ; preds = %for.cond + %11 = load double, double* %tmp, align 8 + ret double %11 +}