Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -12252,6 +12252,31 @@
   return AllocVal;
 }
 
+/// Given the allocate directive list item type and align clause value,
+/// return appropriate alignment.
+static llvm::Value *getAlignmentValue(CodeGenFunction &CGF, QualType ListItemTy,
+                                      const Expr *Alignment) {
+  llvm::Value *AlignVal = nullptr;
+  if (Alignment) {
+    unsigned UserAlign =
+        Alignment->EvaluateKnownConstInt(CGF.getContext()).getExtValue();
+
+    // OpenMP5.1 pg 185 lines 7-10
+    //   Each item in the align modifier list must be aligned to the maximum
+    //   of the specified alignment and the type's natural alignment.
+    //
+    // If no alignment specified then use the natural alignment.
+    CharUnits NaturalAlign = CGF.CGM.getNaturalTypeAlignment(ListItemTy);
+    bool UseNaturalAlign =
+        !UserAlign || UserAlign <= NaturalAlign.getQuantity();
+    AlignVal =
+        UseNaturalAlign
+            ? llvm::ConstantInt::get(CGF.CGM.SizeTy, NaturalAlign.getQuantity())
+            : llvm::ConstantInt::get(CGF.CGM.SizeTy, UserAlign);
+  }
+  return AlignVal;
+}
+
 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                    const VarDecl *VD) {
   if (!VD)
@@ -12291,10 +12316,7 @@
     const Expr *Allocator = AA->getAllocator();
     llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
     llvm::Value *Alignment =
-        AA->getAlignment()
-            ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
-                                        CGM.SizeTy, /*isSigned=*/false)
-            : nullptr;
+        getAlignmentValue(CGF, VD->getType(), AA->getAlignment());
     SmallVector<llvm::Value *, 4> Args;
     Args.push_back(ThreadID);
     if (Alignment)
Index: clang/test/OpenMP/align_clause_codegen.cpp
===================================================================
--- clang/test/OpenMP/align_clause_codegen.cpp
+++ clang/test/OpenMP/align_clause_codegen.cpp
@@ -85,9 +85,9 @@
 // CHECK-32-NEXT:  entry:
 // CHECK-32-NEXT:    [[MYALLOC:%.*]] = alloca i32, align 4
 // CHECK-32-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
-// CHECK-32-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 1, i32 20, i8* null)
+// CHECK-32-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 20, i8* null)
 // CHECK-32-NEXT:    [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
-// CHECK-32-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* inttoptr (i32 7 to i8*))
+// CHECK-32-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* inttoptr (i32 7 to i8*))
 // CHECK-32-NEXT:    [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
 // CHECK-32-NEXT:    [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* inttoptr (i32 6 to i8*))
 // CHECK-32-NEXT:    [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
@@ -114,15 +114,15 @@
 // CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 8 to i8*))
 // CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[MYALLOC]], align 4
 // CHECK-32-NEXT:    [[CONV:%.*]] = inttoptr i32 [[TMP3]] to i8*
-// CHECK-32-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 4, i8* [[CONV]])
+// CHECK-32-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 4, i8* [[CONV]])
 // CHECK-32-NEXT:    [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
 // CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[MYALLOC]], align 4
 // CHECK-32-NEXT:    [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to i8*
-// CHECK-32-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 40, i8* [[CONV1]])
+// CHECK-32-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* [[CONV1]])
 // CHECK-32-NEXT:    [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
 // CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[MYALLOC]], align 4
 // CHECK-32-NEXT:    [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to i8*
-// CHECK-32-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 2, i32 80, i8* [[CONV2]])
+// CHECK-32-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* [[CONV2]])
 // CHECK-32-NEXT:    [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
 // CHECK-32-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null)
 // CHECK-32-NEXT:    [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**
@@ -196,9 +196,9 @@
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[MYALLOC:%.*]] = alloca i64, align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
-// CHECK-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 1, i64 32, i8* null)
+// CHECK-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 32, i8* null)
 // CHECK-NEXT:    [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
-// CHECK-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* inttoptr (i64 7 to i8*))
+// CHECK-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* inttoptr (i64 7 to i8*))
 // CHECK-NEXT:    [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
 // CHECK-NEXT:    [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* inttoptr (i64 6 to i8*))
 // CHECK-NEXT:    [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
@@ -225,15 +225,15 @@
 // CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 8 to i8*))
 // CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[MYALLOC]], align 8
 // CHECK-NEXT:    [[CONV:%.*]] = inttoptr i64 [[TMP3]] to i8*
-// CHECK-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 4, i8* [[CONV]])
+// CHECK-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 4, i8* [[CONV]])
 // CHECK-NEXT:    [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
 // CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[MYALLOC]], align 8
 // CHECK-NEXT:    [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to i8*
-// CHECK-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 48, i8* [[CONV1]])
+// CHECK-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* [[CONV1]])
 // CHECK-NEXT:    [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
 // CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[MYALLOC]], align 8
 // CHECK-NEXT:    [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to i8*
-// CHECK-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 2, i64 80, i8* [[CONV2]])
+// CHECK-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* [[CONV2]])
 // CHECK-NEXT:    [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
 // CHECK-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, i8* null)
 // CHECK-NEXT:    [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**