Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -78,6 +78,11 @@ cl::desc("The size of the first level cache line size specified in bytes."), cl::Hidden, cl::init(64), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt PollyAllocateArraysHeap( + "polly-arrays-heap", + cl::desc("Allocate arrays on the heap and not on the stack."), cl::Hidden, + cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + __isl_give isl_ast_expr * IslNodeBuilder::getUpperBound(__isl_keep isl_ast_node *For, ICmpInst::Predicate &Predicate) { @@ -1393,6 +1398,9 @@ "created arrays that require memory allocation."); Type *NewArrayType = nullptr; + + // Get the size of the array = size(dim_1)*...*size(dim_n) + unsigned long int ArraySizeInt = 1; for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) { auto *DimSize = SAI->getDimensionSize(i); unsigned UnsignedDimSize = static_cast(DimSize) @@ -1403,14 +1411,34 @@ NewArrayType = SAI->getElementType(); NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize); + ArraySizeInt *= UnsignedDimSize; } auto InstIt = Builder.GetInsertBlock()->getParent()->getEntryBlock().getTerminator(); - auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), - SAI->getName(), &*InstIt); - CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); - SAI->setBasePtr(CreatedArray); + + if (PollyAllocateArraysHeap) { + LLVMContext &Ctx = NewArrayType->getContext(); + + // Get the 'int' pointer + auto IntPtrTy = Type::getInt64Ty(Ctx); + + // Get the size of the element type in bits + unsigned Size = SAI->getElementType()->getPrimitiveSizeInBits() / 8; + + // Insert the malloc call before &*InstIt + auto *CreatedArray = CallInst::CreateMalloc( + &*InstIt, IntPtrTy, SAI->getElementType(), + ConstantInt::get(Type::getInt64Ty(Ctx), Size), + ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr, + SAI->getName()); + SAI->setBasePtr(CreatedArray); + } else { + auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(), + SAI->getName(), &*InstIt); + CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize); + SAI->setBasePtr(CreatedArray); + } } } Index: test/Isl/CodeGen/MemAccess/create_arrays_heap.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/create_arrays_heap.ll @@ -0,0 +1,127 @@ +; RUN: opt %loadPolly -mem2reg -polly-scops -analyze -polly-arrays-heap -polly-import-jscop-dir=%S -polly-import-jscop -polly-import-jscop-postfix=transformed < %s 2>&1 | FileCheck %s +; RUN: opt %loadPolly -mem2reg -polly-import-jscop-dir=%S -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-codegen -polly-arrays-heap -S < %s 2>&1 | FileCheck %s --check-prefix=CODEGEN +; +; #define Ni 1056 +; #define Nj 1056 +; #define Nk 1024 +; +; void create_arrays_heap(double beta, double A[Ni][Nk], double B[Ni][Nj]) { +; int i,j,k; +; +; for (i = 0; i < Ni; i++) { +; for (j = 0; j < Nj; j++) { +; for (k = 0; k < Nk; ++k) { +; B[i][j] = beta * A[i][k]; +; } +; } +; } +; } +; +; Check if the info from the JSON file has been analysed without errors. +; CHECK: Arrays { +; CHECK: double MemRef_A[*][1024]; // Element size 8 +; CHECK: double MemRef_beta; // Element size 8 +; CHECK: double MemRef_B[*][1056]; // Element size 8 +; CHECK: double D[270336]; // Element size 8 +; CHECK: double E[270336][200000]; // Element size 8 +; CHECK: i64 F[270336]; // Element size 8 +; +; Check if there are the 3 expected malloc calls with the right parameters. +; %D : size(D) = product_all_dimensions*sizeof(type) = 270336*8 = 2162688 cast to double* +; %E : size(E) = 270336*200000*8 = 432537600000 cast to double* +; %F : size(F) = 270336*8 = 2162688 cast to i64* +; CODEGEN: %malloccall = tail call i8* @malloc(i64 2162688) +; CODEGEN: %D = bitcast i8* %malloccall to double* +; CODEGEN: %malloccall1 = tail call i8* @malloc(i64 432537600000) +; CODEGEN: %E = bitcast i8* %malloccall1 to double* +; CODEGEN: %malloccall2 = tail call i8* @malloc(i64 2162688) +; CODEGEN: %F = bitcast i8* %malloccall2 to i64* +; +; Check if the new access for array E is present. +; CODEGEN: %polly.access.mul.E = mul nsw i64 %polly.indvar, 200000 +; CODEGEN: %polly.access.add.E = add nsw i64 %polly.access.mul.E, %12 +; CODEGEN: %polly.access.E = getelementptr double, double* %E, i64 %polly.access.add.E +; +; +; ModuleID = 'create_arrays_heap.ll' +source_filename = "pure_c_main.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @create_arrays_heap(double %beta, [1024 x double]* nocapture readonly %A, [1056 x double]* nocapture %B) local_unnamed_addr #0 { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc16, %entry + %indvars.iv35 = phi i64 [ 0, %entry ], [ %indvars.iv.next36, %for.inc16 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc13, %for.cond1.preheader + %indvars.iv32 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next33, %for.inc13 ] + %arrayidx12 = getelementptr inbounds [1056 x double], [1056 x double]* %B, i64 %indvars.iv35, i64 %indvars.iv32 + br label %for.body6 + +for.body6: ; preds = %for.body6, %for.cond4.preheader + %indvars.iv = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next.3, %for.body6 ] + %arrayidx8 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv + %0 = load double, double* %arrayidx8, align 8, !tbaa !4 + %mul = fmul double %0, %beta + store double %mul, double* %arrayidx12, align 8, !tbaa !4 + %indvars.iv.next = or i64 %indvars.iv, 1 + %arrayidx8.1 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next + %1 = load double, double* %arrayidx8.1, align 8, !tbaa !4 + %mul.1 = fmul double %1, %beta + store double %mul.1, double* %arrayidx12, align 8, !tbaa !4 + %indvars.iv.next.1 = or i64 %indvars.iv, 2 + %arrayidx8.2 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next.1 + %2 = load double, double* %arrayidx8.2, align 8, !tbaa !4 + %mul.2 = fmul double %2, %beta + store double %mul.2, double* %arrayidx12, align 8, !tbaa !4 + %indvars.iv.next.2 = or i64 %indvars.iv, 3 + %arrayidx8.3 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next.2 + %3 = load double, double* %arrayidx8.3, align 8, !tbaa !4 + %mul.3 = fmul double %3, %beta + store double %mul.3, double* %arrayidx12, align 8, !tbaa !4 + %indvars.iv.next.3 = add nsw i64 %indvars.iv, 4 + %exitcond.3 = icmp eq i64 %indvars.iv.next.3, 1024 + br i1 %exitcond.3, label %for.inc13, label %for.body6 + +for.inc13: ; preds = %for.body6 + %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1 + %exitcond34 = icmp eq i64 %indvars.iv.next33, 1056 + br i1 %exitcond34, label %for.inc16, label %for.cond4.preheader + +for.inc16: ; preds = %for.inc13 + %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 + %exitcond37 = icmp eq i64 %indvars.iv.next36, 1056 + br i1 %exitcond37, label %for.end18, label %for.cond1.preheader + +for.end18: ; preds = %for.inc16 + ret void +} + +; Function Attrs: nounwind readnone uwtable +define i32 @main() local_unnamed_addr #1 { +entry: + ret i32 0 +} + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2 + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} + +!0 = !{i32 2, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i32 1, !"wchar_size", i32 4} +!3 = !{!"clang version 5.0.0 (http://llvm.org/git/clang.git e2d94330ab2852f4528dd84764b932a8179b25eb) (http://llvm.org/git/llvm.git 21ff45022b53e9b0e14273a167c1079ce1d89735)"} +!4 = !{!5, !5, i64 0} +!5 = !{!"double", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} Index: test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop @@ -0,0 +1,62 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*", "1024" ], + "type" : "double" + }, + { + "name" : "MemRef_B", + "sizes" : [ "*", "1056" ], + "type" : "double" + } + ], + "context" : "{ : }", + "location" : "pure_c_main.c:11-16", + "name" : "%for.cond1.preheader---%for.end18", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 4i2] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_beta[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 1 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 2 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 3 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + } + ], + "domain" : "{ Stmt2[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 255 }", + "name" : "Stmt2", + "schedule" : "{ Stmt2[i0, i1, i2] -> [i0, i1, i2] }" + } + ] +} Index: test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop.transformed =================================================================== --- /dev/null +++ test/Isl/CodeGen/MemAccess/create_arrays_heap___%for.cond1.preheader---%for.end18.jscop.transformed @@ -0,0 +1,77 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*", "1024" ], + "type" : "double" + }, + { + "name" : "MemRef_B", + "sizes" : [ "*", "1056" ], + "type" : "double" + }, + { + "name" : "D", + "sizes" : [ "270336" ], + "type" : "double" + }, + { + "name" : "E", + "sizes" : [ "270336", "200000" ], + "type" : "double" + }, + { + "name" : "F", + "sizes" : [ "270336" ], + "type" : "i64" + } + ], + "context" : "{ : }", + "location" : "pure_c_main.c:11-16", + "name" : "%for.cond1.preheader---%for.end18", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 4i2] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_beta[] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 1 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 2 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 3 + 4i2] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }" + } + ], + "domain" : "{ Stmt2[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 255 }", + "name" : "Stmt2", + "schedule" : "{ Stmt2[i0, i1, i2] -> [i0, i1, i2] }" + } + ] +}