Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3471,8 +3471,7 @@ return false; const auto *AA = CVD->getAttr(); // Use the default allocation. - return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && !AA->getAllocator()); } @@ -12240,6 +12239,26 @@ return CGF.GetAddrOfLocalVar(NativeParam); } +/// Return allocator value from expression, or return a null allocator (default +/// when no allocator specified). +static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, + const Expr *Allocator) { + llvm::Value *AllocVal; + if (Allocator) { + AllocVal = CGF.EmitScalarExpr(Allocator); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), + CGF.getContext().VoidPtrTy, + Allocator->getExprLoc()); + } else { + // If no allocator specified, it defaults to the null allocator. + AllocVal = llvm::Constant::getNullValue( + CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); + } + return AllocVal; +} + Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { if (!VD) @@ -12276,20 +12295,24 @@ } llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); const auto *AA = CVD->getAttr(); - assert(AA->getAllocator() && - "Expected allocator expression for non-default allocator."); - llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); - // According to the standard, the original allocator type is a enum - // (integer). Convert to pointer type, if required. - Allocator = CGF.EmitScalarConversion( - Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, - AA->getAllocator()->getExprLoc()); - llvm::Value *Args[] = {ThreadID, Size, Allocator}; - - llvm::Value *Addr = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_alloc), - Args, getName({CVD->getName(), ".void.addr"})); + const Expr *Allocator = AA->getAllocator(); + llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); + llvm::Value *Alignment = + AA->getAlignment() + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), + CGM.SizeTy, /*isSigned=*/false) + : nullptr; + SmallVector Args; + Args.push_back(ThreadID); + if (Alignment) + Args.push_back(Alignment); + Args.push_back(Size); + Args.push_back(AllocVal); + llvm::omp::RuntimeFunction FnID = + Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; + llvm::Value *Addr = CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, + getName({CVD->getName(), ".void.addr"})); llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_free); QualType Ty = CGM.getContext().getPointerType(CVD->getType()); @@ -12303,14 +12326,14 @@ llvm::FunctionCallee RTLFn; SourceLocation::UIntTy LocEncoding; Address Addr; - const Expr *Allocator; + const Expr *AllocExpr; public: OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, SourceLocation::UIntTy LocEncoding, Address Addr, - const Expr *Allocator) + const Expr *AllocExpr) : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), - Allocator(Allocator) {} + AllocExpr(AllocExpr) {} void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { if (!CGF.HaveInsertPoint()) return; @@ -12319,14 +12342,8 @@ CGF, SourceLocation::getFromRawEncoding(LocEncoding)); Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( Addr.getPointer(), CGF.VoidPtrTy); - llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); - // According to the standard, the original allocator type is a enum - // (integer). Convert to pointer type, if required. - AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), - CGF.getContext().VoidPtrTy, - Allocator->getExprLoc()); + llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); Args[2] = AllocVal; - CGF.EmitRuntimeCall(RTLFn, Args); } }; @@ -12334,7 +12351,7 @@ UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); CGF.EHStack.pushCleanup( NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), - VDAddr, AA->getAllocator()); + VDAddr, Allocator); if (UntiedRealAddr.isValid()) if (auto *Region = dyn_cast_or_null(CGF.CapturedStmtInfo)) Index: clang/test/OpenMP/align_clause_codegen.cpp =================================================================== --- /dev/null +++ clang/test/OpenMP/align_clause_codegen.cpp @@ -0,0 +1,303 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs +// RUN: %clang_cc1 -emit-llvm -o - -fopenmp \ +// RUN: -triple i386-unknown-unknown -fopenmp-version=51 %s | \ +// RUN: FileCheck %s --check-prefix=CHECK-32 +// RUN: %clang_cc1 -emit-llvm -o - -fopenmp \ +// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 %s | FileCheck %s +// RUN: %clang_cc1 -fopenmp \ +// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 \ +// RUN: -emit-pch %s -o %t +// RUN: %clang_cc1 -fopenmp \ +// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 \ +// RUN: -include-pch %t -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +typedef enum omp_allocator_handle_t { + omp_null_allocator = 0, + omp_default_mem_alloc = 1, + omp_large_cap_mem_alloc = 2, + omp_const_mem_alloc = 3, + omp_high_bw_mem_alloc = 4, + omp_low_lat_mem_alloc = 5, + omp_cgroup_mem_alloc = 6, + omp_pteam_mem_alloc = 7, + omp_thread_mem_alloc = 8, + KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ +} omp_allocator_handle_t; + +int main() { + int foo0[5]; + int foo1[10]; + int foo2[20]; + int foo3[30]; + int foo4[40]; + int foo5[50]; + int foo6[60]; + int foo7[70]; + int foo8[80]; + omp_allocator_handle_t MyAlloc = omp_large_cap_mem_alloc; + +#pragma omp allocate(foo0) align(1) +#pragma omp allocate(foo1) allocator(omp_pteam_mem_alloc) align(2) +#pragma omp allocate(foo2) align(4) allocator(omp_cgroup_mem_alloc) +#pragma omp allocate(foo3) align(8) allocator(omp_low_lat_mem_alloc) +#pragma omp allocate(foo4) align(16) allocator(omp_high_bw_mem_alloc) +#pragma omp allocate(foo5) align(32) allocator(omp_const_mem_alloc) +#pragma omp allocate(foo6) align(64) allocator(omp_large_cap_mem_alloc) +#pragma omp allocate(foo7) align(32) allocator(omp_thread_mem_alloc) +#pragma omp allocate(foo8) align(16) allocator(omp_null_allocator) + { + double foo9[80]; + double foo10[90]; +#pragma omp allocate(foo9) align(8) allocator(omp_thread_mem_alloc) +#pragma omp allocate(foo10) align(128) + } + { + int bar1; + int bar2[10]; + int bar3[20]; + int *bar4; + float bar5; + double bar6[30]; +#pragma omp allocate(bar1, bar2, bar3) align(2) allocator(MyAlloc) +#pragma omp allocate(bar4, bar5, bar6) align(16) + } +} + +// Verify align clause in template with non-type template parameter. +template +T run() { + T foo[size]; +#pragma omp allocate(foo) align(align) allocator(omp_cgroup_mem_alloc) + return foo[0]; +} + +int template_test() { + double result; + result = run(); + return 0; +} +#endif +// CHECK-32-LABEL: define {{[^@]+}}@main +// CHECK-32-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[MYALLOC:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-32-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 1, i32 20, i8* null) +// CHECK-32-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]* +// CHECK-32-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* inttoptr (i32 7 to i8*)) +// CHECK-32-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]* +// CHECK-32-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* inttoptr (i32 6 to i8*)) +// CHECK-32-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]* +// CHECK-32-NEXT: [[DOTFOO3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 8, i32 120, i8* inttoptr (i32 5 to i8*)) +// CHECK-32-NEXT: [[DOTFOO3__ADDR:%.*]] = bitcast i8* [[DOTFOO3__VOID_ADDR]] to [30 x i32]* +// CHECK-32-NEXT: [[DOTFOO4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 160, i8* inttoptr (i32 4 to i8*)) +// CHECK-32-NEXT: [[DOTFOO4__ADDR:%.*]] = bitcast i8* [[DOTFOO4__VOID_ADDR]] to [40 x i32]* +// CHECK-32-NEXT: [[DOTFOO5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 32, i32 200, i8* inttoptr (i32 3 to i8*)) +// CHECK-32-NEXT: [[DOTFOO5__ADDR:%.*]] = bitcast i8* [[DOTFOO5__VOID_ADDR]] to [50 x i32]* +// CHECK-32-NEXT: [[DOTFOO6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 64, i32 240, i8* inttoptr (i32 2 to i8*)) +// CHECK-32-NEXT: [[DOTFOO6__ADDR:%.*]] = bitcast i8* [[DOTFOO6__VOID_ADDR]] to [60 x i32]* +// CHECK-32-NEXT: [[DOTFOO7__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 32, i32 280, i8* inttoptr (i32 8 to i8*)) +// CHECK-32-NEXT: [[DOTFOO7__ADDR:%.*]] = bitcast i8* [[DOTFOO7__VOID_ADDR]] to [70 x i32]* +// CHECK-32-NEXT: [[DOTFOO8__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 320, i8* null) +// CHECK-32-NEXT: [[DOTFOO8__ADDR:%.*]] = bitcast i8* [[DOTFOO8__VOID_ADDR]] to [80 x i32]* +// CHECK-32-NEXT: store i32 2, i32* [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[DOTFOO9__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 8, i32 640, i8* inttoptr (i32 8 to i8*)) +// CHECK-32-NEXT: [[DOTFOO9__ADDR:%.*]] = bitcast i8* [[DOTFOO9__VOID_ADDR]] to [80 x double]* +// CHECK-32-NEXT: [[DOTFOO10__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 128, i32 720, i8* null) +// CHECK-32-NEXT: [[DOTFOO10__ADDR:%.*]] = bitcast i8* [[DOTFOO10__VOID_ADDR]] to [90 x double]* +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [90 x double]* [[DOTFOO10__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null) +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [80 x double]* [[DOTFOO9__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 8 to i8*)) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = inttoptr i32 [[TMP3]] to i8* +// CHECK-32-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 4, i8* [[CONV]]) +// CHECK-32-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32* +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to i8* +// CHECK-32-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* [[CONV1]]) +// CHECK-32-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]* +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to i8* +// CHECK-32-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 80, i8* [[CONV2]]) +// CHECK-32-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]* +// CHECK-32-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null) +// CHECK-32-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32** +// CHECK-32-NEXT: [[DOTBAR5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null) +// CHECK-32-NEXT: [[DOTBAR5__ADDR:%.*]] = bitcast i8* [[DOTBAR5__VOID_ADDR]] to float* +// CHECK-32-NEXT: [[DOTBAR6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 240, i8* null) +// CHECK-32-NEXT: [[DOTBAR6__ADDR:%.*]] = bitcast i8* [[DOTBAR6__VOID_ADDR]] to [30 x double]* +// CHECK-32-NEXT: [[TMP6:%.*]] = bitcast [30 x double]* [[DOTBAR6__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP6]], i8* null) +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast float* [[DOTBAR5__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP7]], i8* null) +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i32** [[DOTBAR4__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP8]], i8* null) +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [20 x i32]* [[DOTBAR3__ADDR]] to i8* +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV3:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP9]], i8* [[CONV3]]) +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[DOTBAR2__ADDR]] to i8* +// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, i32* [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV4:%.*]] = inttoptr i32 [[TMP12]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP11]], i8* [[CONV4]]) +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast i32* [[DOTBAR1__ADDR]] to i8* +// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, i32* [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV5:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP13]], i8* [[CONV5]]) +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast [80 x i32]* [[DOTFOO8__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP15]], i8* null) +// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast [70 x i32]* [[DOTFOO7__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP16]], i8* inttoptr (i32 8 to i8*)) +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast [60 x i32]* [[DOTFOO6__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP17]], i8* inttoptr (i32 2 to i8*)) +// CHECK-32-NEXT: [[TMP18:%.*]] = bitcast [50 x i32]* [[DOTFOO5__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP18]], i8* inttoptr (i32 3 to i8*)) +// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast [40 x i32]* [[DOTFOO4__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP19]], i8* inttoptr (i32 4 to i8*)) +// CHECK-32-NEXT: [[TMP20:%.*]] = bitcast [30 x i32]* [[DOTFOO3__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP20]], i8* inttoptr (i32 5 to i8*)) +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast [20 x i32]* [[DOTFOO2__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP21]], i8* inttoptr (i32 6 to i8*)) +// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast [10 x i32]* [[DOTFOO1__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP22]], i8* inttoptr (i32 7 to i8*)) +// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast [5 x i32]* [[DOTFOO0__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP23]], i8* null) +// CHECK-32-NEXT: ret i32 0 +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_Z13template_testv +// CHECK-32-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[RESULT:%.*]] = alloca double, align 8 +// CHECK-32-NEXT: [[CALL:%.*]] = call double @_Z3runIdLj1000ELj16EET_v() +// CHECK-32-NEXT: store double [[CALL]], double* [[RESULT]], align 8 +// CHECK-32-NEXT: ret i32 0 +// +// +// CHECK-32-LABEL: define {{[^@]+}}@_Z3runIdLj1000ELj16EET_v +// CHECK-32-SAME: () #[[ATTR2]] comdat { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-32-NEXT: [[DOTFOO__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 8000, i8* inttoptr (i32 6 to i8*)) +// CHECK-32-NEXT: [[DOTFOO__ADDR:%.*]] = bitcast i8* [[DOTFOO__VOID_ADDR]] to [1000 x double]* +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], [1000 x double]* [[DOTFOO__ADDR]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX]], align 8 +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [1000 x double]* [[DOTFOO__ADDR]] to i8* +// CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 6 to i8*)) +// CHECK-32-NEXT: ret double [[TMP1]] +// +// +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[MYALLOC:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 1, i64 32, i8* null) +// CHECK-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]* +// CHECK-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* inttoptr (i64 7 to i8*)) +// CHECK-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]* +// CHECK-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* inttoptr (i64 6 to i8*)) +// CHECK-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]* +// CHECK-NEXT: [[DOTFOO3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 8, i64 128, i8* inttoptr (i64 5 to i8*)) +// CHECK-NEXT: [[DOTFOO3__ADDR:%.*]] = bitcast i8* [[DOTFOO3__VOID_ADDR]] to [30 x i32]* +// CHECK-NEXT: [[DOTFOO4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 160, i8* inttoptr (i64 4 to i8*)) +// CHECK-NEXT: [[DOTFOO4__ADDR:%.*]] = bitcast i8* [[DOTFOO4__VOID_ADDR]] to [40 x i32]* +// CHECK-NEXT: [[DOTFOO5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 32, i64 208, i8* inttoptr (i64 3 to i8*)) +// CHECK-NEXT: [[DOTFOO5__ADDR:%.*]] = bitcast i8* [[DOTFOO5__VOID_ADDR]] to [50 x i32]* +// CHECK-NEXT: [[DOTFOO6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 64, i64 240, i8* inttoptr (i64 2 to i8*)) +// CHECK-NEXT: [[DOTFOO6__ADDR:%.*]] = bitcast i8* [[DOTFOO6__VOID_ADDR]] to [60 x i32]* +// CHECK-NEXT: [[DOTFOO7__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 32, i64 288, i8* inttoptr (i64 8 to i8*)) +// CHECK-NEXT: [[DOTFOO7__ADDR:%.*]] = bitcast i8* [[DOTFOO7__VOID_ADDR]] to [70 x i32]* +// CHECK-NEXT: [[DOTFOO8__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 320, i8* null) +// CHECK-NEXT: [[DOTFOO8__ADDR:%.*]] = bitcast i8* [[DOTFOO8__VOID_ADDR]] to [80 x i32]* +// CHECK-NEXT: store i64 2, i64* [[MYALLOC]], align 8 +// CHECK-NEXT: [[DOTFOO9__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 8, i64 640, i8* inttoptr (i64 8 to i8*)) +// CHECK-NEXT: [[DOTFOO9__ADDR:%.*]] = bitcast i8* [[DOTFOO9__VOID_ADDR]] to [80 x double]* +// CHECK-NEXT: [[DOTFOO10__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 128, i64 720, i8* null) +// CHECK-NEXT: [[DOTFOO10__ADDR:%.*]] = bitcast i8* [[DOTFOO10__VOID_ADDR]] to [90 x double]* +// CHECK-NEXT: [[TMP1:%.*]] = bitcast [90 x double]* [[DOTFOO10__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [80 x double]* [[DOTFOO9__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 8 to i8*)) +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP3]] to i8* +// CHECK-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 4, i8* [[CONV]]) +// CHECK-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32* +// CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to i8* +// CHECK-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* [[CONV1]]) +// CHECK-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]* +// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to i8* +// CHECK-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 80, i8* [[CONV2]]) +// CHECK-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]* +// CHECK-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, i8* null) +// CHECK-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32** +// CHECK-NEXT: [[DOTBAR5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 4, i8* null) +// CHECK-NEXT: [[DOTBAR5__ADDR:%.*]] = bitcast i8* [[DOTBAR5__VOID_ADDR]] to float* +// CHECK-NEXT: [[DOTBAR6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 240, i8* null) +// CHECK-NEXT: [[DOTBAR6__ADDR:%.*]] = bitcast i8* [[DOTBAR6__VOID_ADDR]] to [30 x double]* +// CHECK-NEXT: [[TMP6:%.*]] = bitcast [30 x double]* [[DOTBAR6__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP6]], i8* null) +// CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[DOTBAR5__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP7]], i8* null) +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i32** [[DOTBAR4__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP8]], i8* null) +// CHECK-NEXT: [[TMP9:%.*]] = bitcast [20 x i32]* [[DOTBAR3__ADDR]] to i8* +// CHECK-NEXT: [[TMP10:%.*]] = load i64, i64* [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV3:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP9]], i8* [[CONV3]]) +// CHECK-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[DOTBAR2__ADDR]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = load i64, i64* [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV4:%.*]] = inttoptr i64 [[TMP12]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP11]], i8* [[CONV4]]) +// CHECK-NEXT: [[TMP13:%.*]] = bitcast i32* [[DOTBAR1__ADDR]] to i8* +// CHECK-NEXT: [[TMP14:%.*]] = load i64, i64* [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP14]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP13]], i8* [[CONV5]]) +// CHECK-NEXT: [[TMP15:%.*]] = bitcast [80 x i32]* [[DOTFOO8__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP15]], i8* null) +// CHECK-NEXT: [[TMP16:%.*]] = bitcast [70 x i32]* [[DOTFOO7__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP16]], i8* inttoptr (i64 8 to i8*)) +// CHECK-NEXT: [[TMP17:%.*]] = bitcast [60 x i32]* [[DOTFOO6__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP17]], i8* inttoptr (i64 2 to i8*)) +// CHECK-NEXT: [[TMP18:%.*]] = bitcast [50 x i32]* [[DOTFOO5__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP18]], i8* inttoptr (i64 3 to i8*)) +// CHECK-NEXT: [[TMP19:%.*]] = bitcast [40 x i32]* [[DOTFOO4__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP19]], i8* inttoptr (i64 4 to i8*)) +// CHECK-NEXT: [[TMP20:%.*]] = bitcast [30 x i32]* [[DOTFOO3__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP20]], i8* inttoptr (i64 5 to i8*)) +// CHECK-NEXT: [[TMP21:%.*]] = bitcast [20 x i32]* [[DOTFOO2__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP21]], i8* inttoptr (i64 6 to i8*)) +// CHECK-NEXT: [[TMP22:%.*]] = bitcast [10 x i32]* [[DOTFOO1__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP22]], i8* inttoptr (i64 7 to i8*)) +// CHECK-NEXT: [[TMP23:%.*]] = bitcast [5 x i32]* [[DOTFOO0__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP23]], i8* null) +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@_Z13template_testv +// CHECK-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RESULT:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[CALL:%.*]] = call double @_Z3runIdLj1000ELj16EET_v() +// CHECK-NEXT: store double [[CALL]], double* [[RESULT]], align 8 +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@_Z3runIdLj1000ELj16EET_v +// CHECK-SAME: () #[[ATTR2]] comdat { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: [[DOTFOO__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8000, i8* inttoptr (i64 6 to i8*)) +// CHECK-NEXT: [[DOTFOO__ADDR:%.*]] = bitcast i8* [[DOTFOO__VOID_ADDR]] to [1000 x double]* +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], [1000 x double]* [[DOTFOO__ADDR]], i64 0, i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[ARRAYIDX]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast [1000 x double]* [[DOTFOO__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 6 to i8*)) +// CHECK-NEXT: ret double [[TMP1]] +// Index: clang/test/OpenMP/allocate_codegen.cpp =================================================================== --- clang/test/OpenMP/allocate_codegen.cpp +++ clang/test/OpenMP/allocate_codegen.cpp @@ -30,13 +30,13 @@ KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__ }; -struct St{ - int a; +struct St { + int a; }; -struct St1{ - int a; - static int b; +struct St1 { + int a; + static int b; #pragma omp allocate(b) allocator(omp_default_mem_alloc) } d; @@ -48,36 +48,49 @@ template struct ST { static T m; - #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc) +#pragma omp allocate(m) allocator(omp_low_lat_mem_alloc) }; template T foo() { T v; - #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc) +#pragma omp allocate(v) allocator(omp_cgroup_mem_alloc) v = ST::m; return v; } -namespace ns{ - int a; +namespace ns { +int a; } #pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc) // CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}} -// CHECK-LABEL: @main -int main () { +int main() { static int a; #pragma omp allocate(a) allocator(omp_thread_mem_alloc) - a=2; - // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} - // CHECK: alloca double, - // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} + a = 2; double b = 3; #pragma omp allocate(b) return (foo()); } +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4 +// CHECK-NEXT: [[DOTB__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP0]], i64 8, i8* null) +// CHECK-NEXT: [[DOTB__ADDR:%.*]] = bitcast i8* [[DOTB__VOID_ADDR]] to double* +// CHECK-NEXT: store double 3.000000e+00, double* [[DOTB__ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() +// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[DOTB__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP2]] + // CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}() // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}}) // CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 6 to i8*)) @@ -101,11 +114,11 @@ // CHECK: [[Z_VOID_PTR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 8, i8* inttoptr (i64 1 to i8*)) // CHECK: [[Z_ADDR:%.+]] = bitcast i8* [[Z_VOID_PTR]] to float** // CHECK: store float* %{{.+}}, float** [[Z_ADDR]], -#pragma omp allocate(a,z) allocator(omp_default_mem_alloc) -// CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8* -// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) -// CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) -// CHECK: ret void +#pragma omp allocate(a, z) allocator(omp_default_mem_alloc) + // CHECK-NEXT: [[Z_VOID_PTR:%.+]] = bitcast float** [[Z_ADDR]] to i8* + // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[Z_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) + // CHECK-NEXT: [[A_VOID_PTR:%.+]] = bitcast i32* [[A_ADDR]] to i8* + // CHECK: call void @__kmpc_free(i32 [[GTID]], i8* [[A_VOID_PTR]], i8* inttoptr (i64 1 to i8*)) + // CHECK: ret void } #endif Index: clang/test/OpenMP/allocate_codegen_attr.cpp =================================================================== --- clang/test/OpenMP/allocate_codegen_attr.cpp +++ clang/test/OpenMP/allocate_codegen_attr.cpp @@ -65,19 +65,32 @@ // CHECK-NOT: call {{.+}} {{__kmpc_alloc|__kmpc_free}} -// CHECK-LABEL: @main int main () { static int a; [[omp::directive(allocate(a) allocator(omp_thread_mem_alloc))]]; a=2; - // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} - // CHECK: alloca double, - // CHECK-NOT: {{__kmpc_alloc|__kmpc_free}} double b = 3; [[omp::directive(allocate(b))]]; return (foo()); } +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4 +// CHECK-NEXT: [[DOTB__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP0]], i64 8, i8* null) +// CHECK-NEXT: [[DOTB__ADDR:%.*]] = bitcast i8* [[DOTB__VOID_ADDR]] to double* +// CHECK-NEXT: store double 3.000000e+00, double* [[DOTB__ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() +// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[DOTB__ADDR]] to i8* +// CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP2]] + // CHECK: define {{.*}}i32 @{{.+}}foo{{.+}}() // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @{{.+}}) // CHECK-NEXT: [[V_VOID_ADDR:%.+]] = call i8* @__kmpc_alloc(i32 [[GTID]], i64 4, i8* inttoptr (i64 6 to i8*)) Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def =================================================================== --- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -382,6 +382,8 @@ __OMP_RTL(__kmpc_doacross_fini, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, VoidPtr) +__OMP_RTL(__kmpc_aligned_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, SizeTy, + VoidPtr) __OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr) __OMP_RTL(__kmpc_init_allocator, false, /* omp_allocator_handle_t */ VoidPtr, @@ -905,6 +907,8 @@ ParamAttrs(NoCaptureAttrs)) __OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_aligned_alloc, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs,