Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -12252,6 +12252,31 @@ return AllocVal; } +/// Given the allocate directive list item type and align clause value, +/// return appropriate alignment. +static llvm::Value *getAlignmentValue(CodeGenFunction &CGF, QualType ListItemTy, + const Expr *Alignment) { + llvm::Value *AlignVal = nullptr; + if (Alignment) { + unsigned UserAlign = + Alignment->EvaluateKnownConstInt(CGF.getContext()).getExtValue(); + + // OpenMP5.1 pg 185 lines 7-10 + // Each item in the align modifier list must be aligned to the maximum + // of the specified alignment and the type's natural alignment. + // + // If no alignment specified then use the natural alignment. + CharUnits NaturalAlign = CGF.CGM.getNaturalTypeAlignment(ListItemTy); + bool UseNaturalAlign = + !UserAlign || UserAlign <= NaturalAlign.getQuantity(); + AlignVal = + UseNaturalAlign + ? llvm::ConstantInt::get(CGF.CGM.SizeTy, NaturalAlign.getQuantity()) + : llvm::ConstantInt::get(CGF.CGM.SizeTy, UserAlign); + } + return AlignVal; +} + Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { if (!VD) @@ -12291,10 +12316,7 @@ const Expr *Allocator = AA->getAllocator(); llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); llvm::Value *Alignment = - AA->getAlignment() - ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()), - CGM.SizeTy, /*isSigned=*/false) - : nullptr; + getAlignmentValue(CGF, VD->getType(), AA->getAlignment()); SmallVector Args; Args.push_back(ThreadID); if (Alignment) Index: clang/test/OpenMP/align_clause_codegen.cpp =================================================================== --- clang/test/OpenMP/align_clause_codegen.cpp +++ clang/test/OpenMP/align_clause_codegen.cpp @@ -85,9 +85,9 @@ // CHECK-32-NEXT: entry: // CHECK-32-NEXT: [[MYALLOC:%.*]] = alloca i32, align 4 // CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK-32-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 1, i32 20, i8* null) +// CHECK-32-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 20, i8* null) // CHECK-32-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]* -// CHECK-32-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* inttoptr (i32 7 to i8*)) +// CHECK-32-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* inttoptr (i32 7 to i8*)) // CHECK-32-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]* // CHECK-32-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* inttoptr (i32 6 to i8*)) // CHECK-32-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]* @@ -114,15 +114,15 @@ // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 8 to i8*)) // CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[MYALLOC]], align 4 // CHECK-32-NEXT: [[CONV:%.*]] = inttoptr i32 [[TMP3]] to i8* -// CHECK-32-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 4, i8* [[CONV]]) +// CHECK-32-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 4, i8* [[CONV]]) // CHECK-32-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32* // CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[MYALLOC]], align 4 // CHECK-32-NEXT: [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to i8* -// CHECK-32-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* [[CONV1]]) +// CHECK-32-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* [[CONV1]]) // CHECK-32-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]* // CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[MYALLOC]], align 4 // CHECK-32-NEXT: [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to i8* -// CHECK-32-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 80, i8* [[CONV2]]) +// CHECK-32-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* [[CONV2]]) // CHECK-32-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]* // CHECK-32-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null) // CHECK-32-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32** @@ -196,9 +196,9 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[MYALLOC:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 1, i64 32, i8* null) +// CHECK-NEXT: [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 32, i8* null) // CHECK-NEXT: [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]* -// CHECK-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* inttoptr (i64 7 to i8*)) +// CHECK-NEXT: [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* inttoptr (i64 7 to i8*)) // CHECK-NEXT: [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]* // CHECK-NEXT: [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* inttoptr (i64 6 to i8*)) // CHECK-NEXT: [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]* @@ -225,15 +225,15 @@ // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 8 to i8*)) // CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[MYALLOC]], align 8 // CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP3]] to i8* -// CHECK-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 4, i8* [[CONV]]) +// CHECK-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 4, i8* [[CONV]]) // CHECK-NEXT: [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32* // CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[MYALLOC]], align 8 // CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to i8* -// CHECK-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* [[CONV1]]) +// CHECK-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* [[CONV1]]) // CHECK-NEXT: [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]* // CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[MYALLOC]], align 8 // CHECK-NEXT: [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to i8* -// CHECK-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 80, i8* [[CONV2]]) +// CHECK-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* [[CONV2]]) // CHECK-NEXT: [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]* // CHECK-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, i8* null) // CHECK-NEXT: [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**