diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -5083,13 +5083,16 @@
   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
     Ty = CTy->getElementType();
 
+  auto FloatUsesVector = [this](QualType Ty){
+    return Ty->isRealFloatingType() && &getContext().getFloatTypeSemantics(
+                                           Ty) == &llvm::APFloat::IEEEquad();
+  };
+
   // Only vector types of size 16 bytes need alignment (larger types are
   // passed via reference, smaller types are not aligned).
   if (Ty->isVectorType()) {
     return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8);
-  } else if (Ty->isRealFloatingType() &&
-             &getContext().getFloatTypeSemantics(Ty) ==
-                 &llvm::APFloat::IEEEquad()) {
+  } else if (FloatUsesVector(Ty)) {
     // According to ABI document section 'Optional Save Areas': If extended
     // precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION
     // format are supported, map them to a single quadword, quadword aligned.
@@ -5116,7 +5119,9 @@
 
   // With special case aggregates, only vector base types need alignment.
   if (AlignAsType) {
-    return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8);
+    bool UsesVector = AlignAsType->isVectorType() ||
+                      FloatUsesVector(QualType(AlignAsType, 0));
+    return CharUnits::fromQuantity(UsesVector ? 16 : 8);
   }
 
   // Otherwise, we only need alignment for any aggregate type that
diff --git a/clang/test/CodeGen/ppc64le-varargs-f128.c b/clang/test/CodeGen/ppc64le-varargs-f128.c
--- a/clang/test/CodeGen/ppc64le-varargs-f128.c
+++ b/clang/test/CodeGen/ppc64le-varargs-f128.c
@@ -107,12 +107,16 @@
 // IEEE: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8*
 // IEEE: call void @llvm.va_start(i8* %[[AP1]])
 // IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]]
-// IEEE: %[[V0:[0-9a-zA-Z_.]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
+// IEEE: %[[P0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64
+// IEEE: %[[P1:[0-9a-zA-Z_.]+]] = add i64 %[[P0]], 15
+// IEEE: %[[P2:[0-9a-zA-Z_.]+]] = and i64 %[[P1]], -16
+// IEEE: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[P2]] to i8*
+// IEEE: %[[V0:[0-9a-zA-Z_.]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16
 // IEEE: store i8* %[[V0]], i8** %[[AP]], align 8
-// IEEE: %[[V1:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to %struct.ldbl128_s*
+// IEEE: %[[V1:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to %struct.ldbl128_s*
 // IEEE: %[[V2:[0-9a-zA-Z_.]+]] = bitcast %struct.ldbl128_s* %[[TMP:[0-9a-zA-Z_.]+]] to i8*
 // IEEE: %[[V3:[0-9a-zA-Z_.]+]] = bitcast %struct.ldbl128_s* %[[V1]] to i8*
-// IEEE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %[[V2]], i8* align 8 %[[V3]], i64 16, i1 false)
+// IEEE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %[[V2]], i8* align 16 %[[V3]], i64 16, i1 false)
 // IEEE: %[[COERCE:[0-9a-zA-Z_.]+]] = getelementptr inbounds %struct.ldbl128_s, %struct.ldbl128_s* %[[TMP]], i32 0, i32 0
 // IEEE: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[COERCE]], align 16
 // IEEE: call void @foo_ls(fp128 inreg %[[V4]])