Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -98,7 +98,7 @@ void create(__isl_take isl_ast_node *Node); /// Allocate memory for all new arrays created by Polly. - void allocateNewArrays(); + void allocateNewArrays(BBPair StartExitBlocks); /// Preload all memory loads that are invariant. bool preloadInvariantLoads(); Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -281,6 +281,9 @@ /// Return the base pointer. Value *getBasePtr() const { return BasePtr; } + // Set IsOnHeap to the value in parameter. + void setIsOnHeap(bool value) { IsOnHeap = value; } + /// For indirect accesses return the origin SAI of the BP, else null. const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; } @@ -355,6 +358,12 @@ /// Is this array info modeling an array? bool isArrayKind() const { return Kind == MemoryKind::Array; } + /// Is this array allocated on heap + /// + /// This property is only relevant if the array is allocated by Polly instead + /// of pre-existing. If false, it is allocated using alloca instead malloca. + bool isOnHeap() const { return IsOnHeap; } + /// Dump a readable representation to stderr. void dump() const; @@ -412,6 +421,9 @@ /// The isl id for the base pointer. isl_id *Id; + /// True if the newly allocated array is on heap. + bool IsOnHeap; + /// The sizes of each dimension as SCEV*. SmallVector DimensionSizes; @@ -2593,20 +2605,19 @@ /// @param ElementType The type of the elements stored in this array. /// @param Kind The kind of the array info object. /// @param BaseName The optional name of this memory reference. - const ScopArrayInfo *getOrCreateScopArrayInfo(Value *BasePtr, - Type *ElementType, - ArrayRef Sizes, - MemoryKind Kind, - const char *BaseName = nullptr); + ScopArrayInfo *getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, + ArrayRef Sizes, + MemoryKind Kind, + const char *BaseName = nullptr); /// Create an array and return the corresponding ScopArrayInfo object. /// /// @param ElementType The type of the elements stored in this array. /// @param BaseName The name of this memory reference. /// @param Sizes The sizes of dimensions. - const ScopArrayInfo *createScopArrayInfo(Type *ElementType, - const std::string &BaseName, - const std::vector &Sizes); + ScopArrayInfo *createScopArrayInfo(Type *ElementType, + const std::string &BaseName, + const std::vector &Sizes); /// Return the cached ScopArrayInfo object for @p BasePtr. /// Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -255,8 +255,8 @@ ArrayRef Sizes, MemoryKind Kind, const DataLayout &DL, Scop *S, const char *BaseName) - : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S), - FAD(nullptr) { + : BasePtr(BasePtr), ElementType(ElementType), IsOnHeap(false), Kind(Kind), + DL(DL), S(*S), FAD(nullptr) { std::string BasePtrName = BaseName ? BaseName : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(), @@ -4083,10 +4083,10 @@ } } -const ScopArrayInfo * -Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, - ArrayRef Sizes, MemoryKind Kind, - const char *BaseName) { +ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, + ArrayRef Sizes, + MemoryKind Kind, + const char *BaseName) { assert((BasePtr || BaseName) && "BasePtr and BaseName can not be nullptr at the same time."); assert(!(BasePtr && BaseName) && "BaseName is redundant."); @@ -4107,9 +4107,9 @@ return SAI.get(); } -const ScopArrayInfo * -Scop::createScopArrayInfo(Type *ElementType, const std::string &BaseName, - const std::vector &Sizes) { +ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType, + const std::string &BaseName, + const std::vector &Sizes) { auto *DimSizeType = Type::getInt64Ty(getSE()->getContext()); std::vector SCEVSizes; Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -187,7 +187,7 @@ // All arrays must have their base pointers known before // ScopAnnotator::buildAliasScopes. - NodeBuilder.allocateNewArrays(); + NodeBuilder.allocateNewArrays(StartExitBlocks); Annotator.buildAliasScopes(S); if (PerfMonitoring) { @@ -232,7 +232,14 @@ Value *RTC = NodeBuilder.createRTC(AI.getRunCondition()); Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); - Builder.SetInsertPoint(&StartBlock->front()); + + // Explicitly set the insert point to the end of the block to avoid that a + // split at the builder's current + // insert position would move the malloc calls to the wrong BasicBlock. + // Ideally we would just split the block during allocation of the new + // arrays, but this would break the assumption that there are no blocks + // between polly.start and polly.exiting (at this point). + Builder.SetInsertPoint(StartBlock->getTerminator()); NodeBuilder.create(AstRoot); NodeBuilder.finalize(); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -1383,7 +1383,7 @@ return true; } -void IslNodeBuilder::allocateNewArrays() { +void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) { for (auto &SAI : S.arrays()) { if (SAI->getBasePtr()) continue; @@ -1393,6 +1393,9 @@ "created arrays that require memory allocation."); Type *NewArrayType = nullptr; + + // Get the size of the array = size(dim_1)*...*size(dim_n) + uint64_t ArraySizeInt = 1; for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { auto *DimSize = SAI->getDimensionSize(i); unsigned UnsignedDimSize = static_cast(DimSize) @@ -1403,14 +1406,43 @@ NewArrayType = SAI->getElementType(); NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); + ArraySizeInt *= UnsignedDimSize; } - auto InstIt = - Builder.GetInsertBlock()->getParent()->getEntryBlock().getTerminator(); - auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), - SAI->getName(), &*InstIt); - CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); - SAI->setBasePtr(CreatedArray); + if (SAI->isOnHeap()) { + LLVMContext &Ctx = NewArrayType->getContext(); + + // Get the IntPtrTy from the Datalayout + auto IntPtrTy = DL.getIntPtrType(Ctx); + + // Get the size of the element type in bits + unsigned Size = SAI->getElementType()->getPrimitiveSizeInBits() / 8; + + // Insert the malloc call at polly.start + auto InstIt = std::get<0>(StartExitBlocks)->getTerminator(); + auto *CreatedArray = CallInst::CreateMalloc( + &*InstIt, IntPtrTy, SAI->getElementType(), + ConstantInt::get(Type::getInt64Ty(Ctx), Size), + ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr, + SAI->getName()); + + SAI->setBasePtr(CreatedArray); + + // Insert the free call at polly.exiting + CallInst::CreateFree(CreatedArray, + std::get<1>(StartExitBlocks)->getTerminator()); + + } else { + auto InstIt = Builder.GetInsertBlock() + ->getParent() + ->getEntryBlock() + .getTerminator(); + + auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), + SAI->getName(), &*InstIt); + CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); + SAI->setBasePtr(CreatedArray); + } } } Index: lib/Exchange/JSONExporter.cpp =================================================================== --- lib/Exchange/JSONExporter.cpp +++ lib/Exchange/JSONExporter.cpp @@ -694,17 +694,22 @@ } for (; ArrayIdx < Arrays.size(); ArrayIdx++) { - auto *ElementType = parseTextType(Arrays[ArrayIdx]["type"].asCString(), - S.getSE()->getContext()); + auto &Array = Arrays[ArrayIdx]; + auto *ElementType = + parseTextType(Array["type"].asCString(), S.getSE()->getContext()); if (!ElementType) { errs() << "Error while parsing element type for new array.\n"; return false; } std::vector DimSizes; - for (unsigned i = 0; i < Arrays[ArrayIdx]["sizes"].size(); i++) - DimSizes.push_back(std::stoi(Arrays[ArrayIdx]["sizes"][i].asCString())); - S.createScopArrayInfo(ElementType, Arrays[ArrayIdx]["name"].asCString(), - DimSizes); + for (unsigned i = 0; i < Array["sizes"].size(); i++) + DimSizes.push_back(std::stoi(Array["sizes"][i].asCString())); + auto NewSAI = + S.createScopArrayInfo(ElementType, Array["name"].asCString(), DimSizes); + + if (Array.isMember("allocation")) { + NewSAI->setIsOnHeap(Array["allocation"].asString() == "heap"); + } } return true; Index: test/Isl/CodeGen/MemAccess/create_arrays_heap.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/create_arrays_heap.ll @@ -0,0 +1,113 @@ +; RUN: opt %loadPolly -polly-scops -analyze -polly-import-jscop-dir=%S -polly-import-jscop -polly-import-jscop-postfix=transformed < %s | FileCheck %s +; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN +; +; #define Ni 1056 +; #define Nj 1056 +; #define Nk 1024 +; +; void create_arrays_heap(double beta, double A[Ni][Nk], double B[Ni][Nj]) { +; int i,j,k; +; +; for (i = 0; i < Ni; i++) { +; for (j = 0; j < Nj; j++) { +; for (k = 0; k < Nk; ++k) { +; B[i][j] = beta * A[i][k]; +; } +; } +; } +; } +; +; Check if the info from the JSON file has been analysed without errors. +; CHECK: Arrays { +; CHECK: double MemRef_A[*][1024]; // Element size 8 +; CHECK: double MemRef_beta; // Element size 8 +; CHECK: double MemRef_B[*][1056]; // Element size 8 +; CHECK: double D[270336]; // Element size 8 +; CHECK: double E[270336][200000]; // Element size 8 +; CHECK: i64 F[270336]; // Element size 8 +; +; Check if there are the 3 expected malloc calls with the right parameters at polly.start. +; %D : size(D) = product_all_dimensions*sizeof(type) = 270336*8 = 2162688 cast to double* +; %E : size(E) = 270336*200000*8 = 432537600000 cast to double* +; %F : size(F) = 270336*8 = 2162688 cast to i64* +; CODEGEN: polly.start: +; CODEGEN: %malloccall = tail call i8* @malloc(i64 2162688) +; CODEGEN: %D = bitcast i8* %malloccall to double* +; CODEGEN: %malloccall1 = tail call i8* @malloc(i64 432537600000) +; CODEGEN: %E = bitcast i8* %malloccall1 to double* +; CODEGEN: %malloccall2 = tail call i8* @malloc(i64 2162688) +; CODEGEN: %F = bitcast i8* %malloccall2 to i64* +; +; Check if there are the 3 expected malloc calls with the right parameters at polly.exiting. +; Cast to i8* before freeing because malloc give us a i8 and free is waiting for a i8* +; CODEGEN: polly.exiting: +; CODEGEN: %12 = bitcast double* %D to i8* +; CODEGEN: tail call void @free(i8* %12) +; CODEGEN: %13 = bitcast double* %E to i8* +; CODEGEN: tail call void @free(i8* %13) +; CODEGEN: %14 = bitcast i64* %F to i8* +; CODEGEN: tail call void @free(i8* %14) +; +; Check if the new access for array E is present. +; CODEGEN: %polly.access.mul.E = mul nsw i64 %polly.indvar, 200000 +; CODEGEN: %polly.access.add.E = add nsw i64 %polly.access.mul.E, % +; CODEGEN: %polly.access.E = getelementptr double, double* %E, i64 %polly.access.add.E +; +; ModuleID = 'create_arrays_heap.ll' +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @create_arrays_heap(double %beta, [1024 x double]* nocapture readonly %A, [1056 x double]* nocapture %B) local_unnamed_addr { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc16, %entry + %indvars.iv35 = phi i64 [ 0, %entry ], [ %indvars.iv.next36, %for.inc16 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc13, %for.cond1.preheader + %indvars.iv32 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next33, %for.inc13 ] + %arrayidx12 = getelementptr inbounds [1056 x double], [1056 x double]* %B, i64 %indvars.iv35, i64 %indvars.iv32 + br label %for.body6 + +for.body6: ; preds = %for.body6, %for.cond4.preheader + %indvars.iv = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next.3, %for.body6 ] + %arrayidx8 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv + %0 = load double, double* %arrayidx8, align 8 + %mul = fmul double %0, %beta + store double %mul, double* %arrayidx12, align 8 + %indvars.iv.next = or i64 %indvars.iv, 1 + %arrayidx8.1 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next + %1 = load double, double* %arrayidx8.1, align 8 + %mul.1 = fmul double %1, %beta + store double %mul.1, double* %arrayidx12, align 8 + %indvars.iv.next.1 = or i64 %indvars.iv, 2 + %arrayidx8.2 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next.1 + %2 = load double, double* %arrayidx8.2, align 8 + %mul.2 = fmul double %2, %beta + store double %mul.2, double* %arrayidx12, align 8 + %indvars.iv.next.2 = or i64 %indvars.iv, 3 + %arrayidx8.3 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next.2 + %3 = load double, double* %arrayidx8.3, align 8 + %mul.3 = fmul double %3, %beta + store double %mul.3, double* %arrayidx12, align 8 + %indvars.iv.next.3 = add nsw i64 %indvars.iv, 4 + %exitcond.3 = icmp eq i64 %indvars.iv.next.3, 1024 + br i1 %exitcond.3, label %for.inc13, label %for.body6 + +for.inc13: ; preds = %for.body6 + %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1 + %exitcond34 = icmp eq i64 %indvars.iv.next33, 1056 + br i1 %exitcond34, label %for.inc16, label %for.cond4.preheader + +for.inc16: ; preds = %for.inc13 + %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 + %exitcond37 = icmp eq i64 %indvars.iv.next36, 1056 + br i1 %exitcond37, label %for.end18, label %for.cond1.preheader + +for.end18: ; preds = %for.inc16 + ret void +} + Index: test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop @@ -0,0 +1,62 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*", "1024" ], + "type" : "double" + }, + { + "name" : "MemRef_B", + "sizes" : [ "*", "1056" ], + "type" : "double" + } + ], + "context" : "{ : }", + "location" : "pure_c_main.c:11-16", + "name" : "%for.cond1.preheader---%for.end18", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 4i2] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_beta[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 1 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 2 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 3 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + } + ], + "domain" : "{ Stmt2[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 255 }", + "name" : "Stmt2", + "schedule" : "{ Stmt2[i0, i1, i2] -> [i0, i1, i2] }" + } + ] +} Index: test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop.transformed =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop.transformed @@ -0,0 +1,80 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*", "1024" ], + "type" : "double" + }, + { + "name" : "MemRef_B", + "sizes" : [ "*", "1056" ], + "type" : "double" + }, + { + "name" : "D", + "sizes" : [ "270336" ], + "type" : "double", + "allocation" : "heap" + }, + { + "name" : "E", + "sizes" : [ "270336", "200000" ], + "type" : "double", + "allocation" : "heap" + }, + { + "name" : "F", + "sizes" : [ "270336" ], + "type" : "i64", + "allocation" : "heap" + } + ], + "context" : "{ : }", + "location" : "pure_c_main.c:11-16", + "name" : "%for.cond1.preheader---%for.end18", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 4i2] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_beta[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 1 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 2 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 3 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + } + ], + "domain" : "{ Stmt2[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 255 }", + "name" : "Stmt2", + "schedule" : "{ Stmt2[i0, i1, i2] -> [i0, i1, i2] }" + } + ] +}