Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -295,17 +295,13 @@
 static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
                                                   llvm::Value *Ptr,
                                                   CharUnits Align) {
-  llvm::Value *PtrAsInt = Ptr;
   // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align;
-  PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy);
-  PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt,
-        llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1));
-  PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt,
-           llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity()));
-  PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt,
-                                        Ptr->getType(),
-                                        Ptr->getName() + ".aligned");
-  return PtrAsInt;
+  llvm::Value *RoundUp = CGF.Builder.CreateConstInBoundsGEP1_32(
+      CGF.Builder.getInt8Ty(), Ptr, Align.getQuantity() - 1);
+  return CGF.Builder.CreateIntrinsic(
+      llvm::Intrinsic::ptrmask, {CGF.AllocaInt8PtrTy, CGF.IntPtrTy},
+      {RoundUp, llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity())},
+      nullptr, Ptr->getName() + ".aligned");
 }
 
 /// Emit va_arg for a platform using the common void* representation,
Index: clang/test/CodeGen/PowerPC/ppc-varargs-struct.c
===================================================================
--- clang/test/CodeGen/PowerPC/ppc-varargs-struct.c
+++ clang/test/CodeGen/PowerPC/ppc-varargs-struct.c
@@ -37,10 +37,8 @@
 // CHECK-PPC-NEXT:  store i8 8, ptr [[GPRPTR]], align 4
 // CHECK-PPC-NEXT:  [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, ptr [[ARRAYDECAY]], i32 0, i32 3
 // CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%.+]] = load ptr, ptr [[OVERFLOW_AREA_P]], align 4
-// CHECK-PPC-NEXT:  %{{[0-9]+}} =  ptrtoint ptr %argp.cur to i32
-// CHECK-PPC-NEXT:  %{{[0-9]+}} = add i32 %{{[0-9]+}}, 7
-// CHECK-PPC-NEXT:  %{{[0-9]+}} = and i32 %{{[0-9]+}}, -8
-// CHECK-PPC-NEXT:  %argp.cur.aligned = inttoptr i32 %{{[0-9]+}} to ptr
+// CHECK-PPC-NEXT: [[GEP_ALIGN:%[0-9]+]] = getelementptr inbounds i8, ptr %argp.cur, i32 7
+// CHECK-PPC-NEXT: %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i32(ptr [[GEP_ALIGN]], i32 -8)
 // CHECK-PPC-NEXT:  [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, ptr %argp.cur.aligned, i32 4
 // CHECK-PPC-NEXT:  store ptr [[NEW_OVERFLOW_AREA:%[0-9]+]], ptr [[OVERFLOW_AREA_P]], align 4
 // CHECK-PPC-NEXT:  br label %[[CONT]]
@@ -51,7 +49,7 @@
 // CHECK-PPC-NEXT:  call void @llvm.memcpy.p0.p0.i32(ptr align 8 %t, ptr align 8 [[AGGR]], i32 16, i1 false)
 
   int v = va_arg (ap, int);
-  
+
 // CHECK: getelementptr inbounds i8, ptr %{{[a-z.0-9]*}}, i64 4
 // CHECK-PPC:       [[ARRAYDECAY:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr %ap, i32 0, i32 0
 // CHECK-PPC-NEXT:  [[GPRPTR:%.+]] = getelementptr inbounds %struct.__va_list_tag, ptr [[ARRAYDECAY]], i32 0, i32 0
Index: clang/test/CodeGen/arm-abi-vector.c
===================================================================
--- clang/test/CodeGen/arm-abi-vector.c
+++ clang/test/CodeGen/arm-abi-vector.c
@@ -16,8 +16,7 @@
 double varargs_vec_2i(int fixed, ...) {
 // CHECK: varargs_vec_2i
 // CHECK: [[VAR:%.*]] = alloca <2 x i32>, align 8
-// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 8
 // CHECK: [[VEC:%.*]] = load <2 x i32>, ptr [[AP_ALIGN]], align 8
 // CHECK: store <2 x i32> [[VEC]], ptr [[VAR]], align 8
@@ -29,8 +28,7 @@
 // APCS-GNU: store <2 x i32> [[VEC]], ptr [[VAR]], align 8
 // ANDROID: varargs_vec_2i
 // ANDROID: [[VAR:%.*]] = alloca <2 x i32>, align 8
-// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 8
 // ANDROID: [[VEC:%.*]] = load <2 x i32>, ptr [[AP_ALIGN]], align 8
 // ANDROID: store <2 x i32> [[VEC]], ptr [[VAR]], align 8
@@ -85,8 +83,7 @@
 double varargs_vec_5c(int fixed, ...) {
 // CHECK: varargs_vec_5c
 // CHECK: [[VAR:%.*]] = alloca <5 x i8>, align 8
-// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 8
 // CHECK: [[VEC:%.*]] = load <5 x i8>, ptr [[AP_ALIGN]], align 8
 // CHECK: store <5 x i8> [[VEC]], ptr [[VAR]], align 8
@@ -98,8 +95,7 @@
 // APCS-GNU: store <5 x i8> [[VEC]], ptr [[VAR]], align 8
 // ANDROID: varargs_vec_5c
 // ANDROID: [[VAR:%.*]] = alloca <5 x i8>, align 8
-// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 8
 // ANDROID: [[VEC:%.*]] = load <5 x i8>, ptr [[AP_ALIGN]], align 8
 // ANDROID: store <5 x i8> [[VEC]], ptr [[VAR]], align 8
@@ -125,8 +121,7 @@
 double varargs_vec_9c(int fixed, ...) {
 // CHECK: varargs_vec_9c
 // CHECK: [[VAR:%.*]] = alloca <9 x i8>, align 16
-// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 16
 // CHECK: [[T0:%.*]] = load <9 x i8>, ptr [[AP_ALIGN]], align 8
 // CHECK: store <9 x i8> [[T0]], ptr [[VAR]], align 16
@@ -138,8 +133,7 @@
 // APCS-GNU: store <9 x i8> [[VEC]], ptr [[VAR]], align 16
 // ANDROID: varargs_vec_9c
 // ANDROID: [[VAR:%.*]] = alloca <9 x i8>, align 16
-// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 16
 // ANDROID: [[T0:%.*]] = load <9 x i8>, ptr [[AP_ALIGN]], align 8
 // ANDROID: store <9 x i8> [[T0]], ptr [[VAR]], align 16
@@ -194,8 +188,7 @@
 double varargs_vec_3s(int fixed, ...) {
 // CHECK: varargs_vec_3s
 // CHECK: alloca <3 x i16>, align 8
-// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 8
 // APCS-GNU: varargs_vec_3s
 // APCS-GNU: [[VAR:%.*]] = alloca <3 x i16>, align 8
@@ -204,8 +197,7 @@
 // APCS-GNU: [[VEC:%.*]] = load <3 x i16>, ptr [[AP]], align 4
 // ANDROID: varargs_vec_3s
 // ANDROID: alloca <3 x i16>, align 8
-// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 8
   va_list ap;
   double sum = fixed;
@@ -229,8 +221,7 @@
 double varargs_vec_5s(int fixed, ...) {
 // CHECK: varargs_vec_5s
 // CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
-// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 16
 // CHECK: [[VEC:%.*]] = load <5 x i16>, ptr [[AP_ALIGN]], align 8
 // CHECK: store <5 x i16> [[VEC]], ptr [[VAR_ALIGN]], align 16
@@ -241,8 +232,7 @@
 // APCS-GNU: [[VEC:%.*]] = load <5 x i16>, ptr [[AP]], align 4
 // ANDROID: varargs_vec_5s
 // ANDROID: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
-// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 16
 // ANDROID: [[VEC:%.*]] = load <5 x i16>, ptr [[AP_ALIGN]], align 8
 // ANDROID: store <5 x i16> [[VEC]], ptr [[VAR_ALIGN]], align 16
@@ -274,16 +264,14 @@
 
 double varargs_struct(int fixed, ...) {
 // CHECK: varargs_struct
-// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 16
 // APCS-GNU: varargs_struct
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca %struct.StructWithVec
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr {{%.*}}, i32 16
 // APCS-GNU: call void @llvm.memcpy
 // ANDROID: varargs_struct
-// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
-// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to ptr
+// ANDROID: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr {{%.*}}, i32 -8)
 // ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i32 16
   va_list ap;
   double sum = fixed;
Index: clang/test/CodeGen/arm-varargs.c
===================================================================
--- clang/test/CodeGen/arm-varargs.c
+++ clang/test/CodeGen/arm-varargs.c
@@ -41,10 +41,8 @@
 // CHECK-LABEL: define{{.*}} void @simple_aligned_struct(ptr noalias sret(%struct.aligned_bigstruct) align 8 %agg.result)
   return va_arg(the_list, struct aligned_bigstruct);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load ptr, ptr @the_list, align 4
-// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint ptr [[CUR]] to i32
-// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
-// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
-// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to ptr
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR]], i32 7
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CUR_INT_ADD]], i32 -8)
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR_ALIGNED]], i32 16
 // CHECK: store ptr [[NEXT]], ptr @the_list, align 4
 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 8 %agg.result, ptr align 8 [[CUR_ALIGNED]], i32 16, i1 false)
@@ -55,10 +53,8 @@
 // CHECK-LABEL: define{{.*}} double @simple_double
   return va_arg(the_list, double);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load ptr, ptr @the_list, align 4
-// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint ptr [[CUR]] to i32
-// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
-// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
-// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to ptr
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR]], i32 7
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CUR_INT_ADD]], i32 -8)
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR_ALIGNED]], i32 8
 // CHECK: store ptr [[NEXT]], ptr @the_list, align 4
 // CHECK: [[RESULT:%[a-z0-9._]+]] = load double, ptr [[CUR_ALIGNED]]
@@ -110,10 +106,8 @@
 // CHECK-LABEL: define{{.*}} i64 @underaligned_long_long_test()
   return va_arg(the_list, underaligned_long_long);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load ptr, ptr @the_list, align 4
-// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint ptr [[CUR]] to i32
-// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
-// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
-// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to ptr
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR]], i32 7
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CUR_INT_ADD]], i32 -8)
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR_ALIGNED]], i32 8
 // CHECK: store ptr [[NEXT]], ptr @the_list, align 4
 // CHECK: [[RESULT:%[a-z0-9._]+]] = load i64, ptr [[CUR_ALIGNED]]
@@ -125,10 +119,8 @@
 // CHECK-LABEL: define{{.*}} i64 @overaligned_long_long_test()
   return va_arg(the_list, overaligned_long_long);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load ptr, ptr @the_list, align 4
-// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint ptr [[CUR]] to i32
-// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
-// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
-// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to ptr
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR]], i32 7
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CUR_INT_ADD]], i32 -8)
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR_ALIGNED]], i32 8
 // CHECK: store ptr [[NEXT]], ptr @the_list, align 4
 // CHECK: [[RESULT:%[a-z0-9._]+]] = load i64, ptr [[CUR_ALIGNED]]
@@ -196,10 +188,8 @@
 // CHECK-LABEL: define{{.*}} void @overaligned_long_long_struct_test(ptr noalias sret(%struct.overaligned_long_long_struct) align 16 %agg.result)
   return va_arg(the_list, overaligned_long_long_struct);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load ptr, ptr @the_list, align 4
-// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint ptr [[CUR]] to i32
-// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
-// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
-// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to ptr
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR]], i32 7
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CUR_INT_ADD]], i32 -8)
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR_ALIGNED]], i32 16
 // CHECK: store ptr [[NEXT]], ptr @the_list, align 4
 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 %agg.result, ptr align 8 [[CUR_ALIGNED]], i32 16, i1 false)
@@ -232,10 +222,8 @@
 // CHECK-LABEL: define{{.*}} void @overaligned_int_struct_member_test(ptr noalias sret(%struct.overaligned_int_struct_member) align 16 %agg.result)
   return va_arg(the_list, overaligned_int_struct_member);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load ptr, ptr @the_list, align 4
-// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint ptr [[CUR]] to i32
-// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
-// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
-// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to ptr
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR]], i32 7
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CUR_INT_ADD]], i32 -8)
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR_ALIGNED]], i32 16
 // CHECK: store ptr [[NEXT]], ptr @the_list, align 4
 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 %agg.result, ptr align 8 [[CUR_ALIGNED]], i32 16, i1 false)
@@ -262,10 +250,8 @@
 // CHECK-LABEL: define{{.*}} void @overaligned_long_long_struct_member_test(ptr noalias sret(%struct.overaligned_long_long_struct_member) align 16 %agg.result)
   return va_arg(the_list, overaligned_long_long_struct_member);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load ptr, ptr @the_list, align 4
-// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint ptr [[CUR]] to i32
-// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
-// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
-// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to ptr
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR]], i32 7
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CUR_INT_ADD]], i32 -8)
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, ptr [[CUR_ALIGNED]], i32 16
 // CHECK: store ptr [[NEXT]], ptr @the_list, align 4
 // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 16 %agg.result, ptr align 8 [[CUR_ALIGNED]], i32 16, i1 false)
Index: clang/test/CodeGen/arm64-abi-vector.c
===================================================================
--- clang/test/CodeGen/arm64-abi-vector.c
+++ clang/test/CodeGen/arm64-abi-vector.c
@@ -94,8 +94,9 @@
 double varargs_vec_9c(int fixed, ...) {
 // CHECK: varargs_vec_9c
 // CHECK: alloca <9 x i8>, align 16
-// CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to ptr
+// CHECK: [[AP:%.*]] = load ptr, ptr %ap, align 8
+// CHECK: [[AP_ADD:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 15
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[AP_ADD]], i64 -16)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i64 16
   va_list ap;
   double sum = fixed;
@@ -153,8 +154,9 @@
 double varargs_vec_5s(int fixed, ...) {
 // CHECK: varargs_vec_5s
 // CHECK: alloca <5 x i16>, align 16
-// CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to ptr
+// CHECK: [[AP:%.*]] = load ptr, ptr %ap, align 8
+// CHECK: [[AP_ADD:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 15
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[AP_ADD]], i64 -16)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i64 16
   va_list ap;
   double sum = fixed;
@@ -174,8 +176,9 @@
 double varargs_vec_3i(int fixed, ...) {
 // CHECK: varargs_vec_3i
 // CHECK: alloca <3 x i32>, align 16
-// CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to ptr
+// CHECK: [[AP:%.*]] = load ptr, ptr %ap, align 8
+// CHECK: [[AP_ADD:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 15
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[AP_ADD]], i64 -16)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i64 16
   va_list ap;
   double sum = fixed;
@@ -244,8 +247,11 @@
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_CUR:%.*]], i64 8
   sum = sum + c5.x + c5.y;
   __char9 c9 = va_arg(ap, __char9);
-// CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to ptr
+
+
+// CHECK: [[AP:%.*]] = load ptr, ptr %ap, align 8
+// CHECK: [[AP_ADD:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 15
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[AP_ADD]], i64 -16)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i64 16
   sum = sum + c9.x + c9.y;
   __char19 c19 = va_arg(ap, __char19);
@@ -256,13 +262,17 @@
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_CUR:%.*]], i64 8
   sum = sum + s3.x + s3.y;
   __short5 s5 = va_arg(ap, __short5);
-// CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to ptr
+
+// CHECK: [[AP:%.*]] = load ptr, ptr %ap, align 8
+// CHECK: [[AP_ADD:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 15
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[AP_ADD]], i64 -16)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i64 16
   sum = sum + s5.x + s5.y;
   __int3 i3 = va_arg(ap, __int3);
-// CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
-// CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to ptr
+
+// CHECK: [[AP:%.*]] = load ptr, ptr %ap, align 8
+// CHECK: [[AP_ADD:%.*]] = getelementptr inbounds i8, ptr [[AP]], i32 15
+// CHECK: [[AP_ALIGN:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[AP_ADD]], i64 -16)
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[AP_ALIGN]], i64 16
   sum = sum + i3.x + i3.y;
   __int5 i5 = va_arg(ap, __int5);
Index: clang/test/CodeGen/x86_32-align-linux.c
===================================================================
--- clang/test/CodeGen/x86_32-align-linux.c
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -9,10 +9,8 @@
 
 // CHECK-LABEL: define dso_local void @testm128
 // CHECK-LABEL: %argp.cur = load ptr, ptr %args, align 4
-// CHECK-NEXT:  %0 = ptrtoint ptr %argp.cur to i32
-// CHECK-NEXT:  %1 = add i32 %0, 15
-// CHECK-NEXT:  %2 = and i32 %1, -16
-// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to ptr
+// CHECK-NEXT: %0 = getelementptr inbounds i8, ptr %argp.cur, i32 15
+// CHECK-NEXT: %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i32(ptr %0, i32 -16)
 void testm128(int argCount, ...) {
   __m128 res;
   __builtin_va_list args;
@@ -23,10 +21,8 @@
 
 // CHECK-LABEL: define dso_local void @testm256
 // CHECK-LABEL: %argp.cur = load ptr, ptr %args, align 4
-// CHECK-NEXT:  %0 = ptrtoint ptr %argp.cur to i32
-// CHECK-NEXT:  %1 = add i32 %0, 31
-// CHECK-NEXT:  %2 = and i32 %1, -32
-// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to ptr
+// CHECK-NEXT: %0 = getelementptr inbounds i8, ptr %argp.cur, i32 31
+// CHECK-NEXT: %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i32(ptr %0, i32 -32)
 void testm256(int argCount, ...) {
   __m256 res;
   __builtin_va_list args;
@@ -37,10 +33,8 @@
 
 // CHECK-LABEL: define dso_local void @testm512
 // CHECK-LABEL: %argp.cur = load ptr, ptr %args, align 4
-// CHECK-NEXT:  %0 = ptrtoint ptr %argp.cur to i32
-// CHECK-NEXT:  %1 = add i32 %0, 63
-// CHECK-NEXT:  %2 = and i32 %1, -64
-// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to ptr
+// CHECK-NEXT: %0 = getelementptr inbounds i8, ptr %argp.cur, i32 63
+// CHECK-NEXT: %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i32(ptr %0, i32 -64)
 void testm512(int argCount, ...) {
   __m512 res;
   __builtin_va_list args;