Index: lib/CodeGen/ABIInfo.h
===================================================================
--- lib/CodeGen/ABIInfo.h
+++ lib/CodeGen/ABIInfo.h
@@ -73,6 +73,15 @@
     // abstract this out.
     virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                    CodeGen::CodeGenFunction &CGF) const = 0;
+
+    virtual bool isHomogeneousAggregateBaseType(QualType Ty) const;
+
+    virtual bool isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                   uint64_t Members) const;
+
+    bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                uint64_t &Members) const;
+
   };
 }  // end namespace clang
 
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -575,6 +575,27 @@
 };
 }  // namespace
 
+// Collect the fields of a record. Check for non-virtual bases and recursively
+// add their fields first. Doesn't work on dynamic classes.
+void addFields(const RecordDecl *RD,
+               SmallVectorImpl<const FieldDecl *> &Fields) {
+  if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+    // This code assumes that layout order is the same as base specifier order,
+    // which is true for non-dynamic classes in all supported ABIs.
+    assert(!CXXRD->isDynamicClass() && "can't expand dynamic class");
+    for (const CXXBaseSpecifier &BS : CXXRD->bases()) {
+      const CXXRecordDecl *Base = BS.getType()->getAsCXXRecordDecl();
+      addFields(Base, Fields);
+    }
+  }
+
+  for (const auto *FD : RD->fields()) {
+    assert(!FD->isBitField() &&
+           "Cannot expand structure with bit-field members.");
+    Fields.push_back(FD);
+  }
+}
+
 static std::unique_ptr<TypeExpansion>
 getTypeExpansion(QualType Ty, const ASTContext &Context) {
   if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
@@ -604,11 +625,7 @@
       if (LargestFD)
         Fields.push_back(LargestFD);
     } else {
-      for (const auto *FD : RD->fields()) {
-        assert(!FD->isBitField() &&
-               "Cannot expand structure with bit-field members.");
-        Fields.push_back(FD);
-      }
+      addFields(RD, Fields);
     }
     return llvm::make_unique<RecordExpansion>(std::move(Fields));
   }
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -85,6 +85,15 @@
   return CGT.getTarget();
 }
 
+bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  return false;
+}
+
+bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                uint64_t Members) const {
+  return false;
+}
+
 void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
@@ -3044,12 +3053,14 @@
 
   bool isPromotableTypeForABI(QualType Ty) const;
   bool isAlignedParamType(QualType Ty) const;
-  bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
-                              uint64_t &Members) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t Members) const override;
+
   // TODO: We can add more logic to computeInfo to improve performance.
   // Example: For aggregate arguments that fit in a register, we could
   // use getDirectInReg (as is done below for structs containing a single
@@ -3192,9 +3203,8 @@
 /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
 /// aggregate.  Base is set to the base element type, and Members is set
 /// to the number of base elements.
-bool
-PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                           uint64_t &Members) const {
+bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                     uint64_t &Members) const {
   if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
     uint64_t NElements = AT->getSize().getZExtValue();
     if (NElements == 0)
@@ -3263,19 +3273,9 @@
       Ty = CT->getElementType();
     }
 
-    // Homogeneous aggregates for ELFv2 must have base types of float,
-    // double, long double, or 128-bit vectors.
-    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-      if (BT->getKind() != BuiltinType::Float &&
-          BT->getKind() != BuiltinType::Double &&
-          BT->getKind() != BuiltinType::LongDouble)
-        return false;
-    } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
-      if (getContext().getTypeSize(VT) != 128)
-        return false;
-    } else {
+    // Most ABIs only support float, double, and some vector type widths.
+    if (!isHomogeneousAggregateBaseType(Ty))
       return false;
-    }
 
     // The base type must be the same for all members.  Types that
     // agree in both total size and mode (float vs. vector) are
@@ -3288,14 +3288,34 @@
         getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
       return false;
   }
+  return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);
+}
+
+bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // Homogeneous aggregates for ELFv2 must have base types of float,
+  // double, long double, or 128-bit vectors.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->getKind() == BuiltinType::Float ||
+        BT->getKind() == BuiltinType::Double ||
+        BT->getKind() == BuiltinType::LongDouble)
+      return true;
+  }
+  if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    if (getContext().getTypeSize(VT) == 128)
+      return true;
+  }
+  return false;
+}
 
+bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
+    const Type *Base, uint64_t Members) const {
   // Vector types require one register, floating point types require one
   // or two registers depending on their size.
-  uint32_t NumRegs = Base->isVectorType() ? 1 :
-                       (getContext().getTypeSize(Base) + 63) / 64;
+  uint32_t NumRegs =
+      Base->isVectorType() ? 1 : (getContext().getTypeSize(Base) + 63) / 64;
 
   // Homogeneous Aggregates may occupy at most 8 registers.
-  return (Members > 0 && Members * NumRegs <= 8);
+  return Members * NumRegs <= 8;
 }
 
 ABIArgInfo
@@ -3586,6 +3606,10 @@
   ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
                                   bool &IsHA, unsigned &AllocatedGPR,
                                   bool &IsSmallAggr, bool IsNamedArg) const;
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t Members) const override;
+
   bool isIllegalVectorType(QualType Ty) const;
 
   virtual void computeInfo(CGFunctionInfo &FI) const {
@@ -3681,11 +3705,6 @@
 };
 }
 
-static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                   ASTContext &Context,
-                                   bool isAArch64,
-                                   uint64_t *HAMembers = nullptr);
-
 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
                                                 unsigned &AllocatedVFP,
                                                 bool &IsHA,
@@ -3765,7 +3784,7 @@
   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
   const Type *Base = nullptr;
   uint64_t Members = 0;
-  if (isARMHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) {
+  if (isHomogeneousAggregate(Ty, Base, Members)) {
     IsHA = true;
     if (!IsNamedArg && isDarwinPCS()) {
       // With the Darwin ABI, variadic arguments are always passed on the stack
@@ -3823,7 +3842,8 @@
     return ABIArgInfo::getIgnore();
 
   const Type *Base = nullptr;
-  if (isARMHomogeneousAggregate(RetTy, Base, getContext(), true))
+  uint64_t Members = 0;
+  if (isHomogeneousAggregate(RetTy, Base, Members))
     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
     return ABIArgInfo::getDirect();
 
@@ -3851,9 +3871,35 @@
   return false;
 }
 
-static llvm::Value *EmitAArch64VAArg(llvm::Value *VAListAddr, QualType Ty,
-                                     int AllocatedGPR, int AllocatedVFP,
-                                     bool IsIndirect, CodeGenFunction &CGF) {
+bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // Homogeneous aggregates for AAPCS64 must have base types of a floating
+  // point type or a short-vector type. This is the same as the 32-bit ABI,
+  // but with the difference that any floating-point type is allowed,
+  // including __fp16.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->isFloatingPoint())
+      return true;
+  } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    unsigned VecSize = getContext().getTypeSize(VT);
+    if (VecSize == 64 || VecSize == 128)
+      return true;
+  }
+  return false;
+}
+
+bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                       uint64_t Members) const {
+  return Members <= 4;
+}
+
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                          CodeGenFunction &CGF) const {
+  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
+  bool IsHA = false, IsSmallAggr = false;
+  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
+                                       IsSmallAggr, false /*IsNamedArg*/);
+  bool IsIndirect = AI.isIndirect();
+
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
   //
@@ -3959,8 +4005,8 @@
   }
 
   const Type *Base = nullptr;
-  uint64_t NumMembers;
-  bool IsHFA = isARMHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers);
+  uint64_t NumMembers = 0;
+  bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
   if (IsHFA && NumMembers > 1) {
     // Homogeneous aggregates passed in registers will have their elements split
     // and stored 16-bytes apart regardless of size (they're notionally in qN,
@@ -4079,18 +4125,6 @@
   return ResAddr;
 }
 
-llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                          CodeGenFunction &CGF) const {
-
-  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
-  bool IsHA = false, IsSmallAggr = false;
-  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
-                                       IsSmallAggr, false /*IsNamedArg*/);
-
-  return EmitAArch64VAArg(VAListAddr, Ty, AllocatedGPR, AllocatedVFP,
-                          AI.isIndirect(), CGF);
-}
-
 llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
                                            CodeGenFunction &CGF) const {
   // We do not support va_arg for aggregates or illegal vector types.
@@ -4103,7 +4137,8 @@
   uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
 
   const Type *Base = nullptr;
-  bool isHA = isARMHomogeneousAggregate(Ty, Base, getContext(), true);
+  uint64_t Members = 0;
+  bool isHA = isHomogeneousAggregate(Ty, Base, Members);
 
   bool isIndirect = false;
   // Arguments bigger than 16 bytes which aren't homogeneous aggregates should
@@ -4210,6 +4245,10 @@
                                   bool &IsCPRC) const;
   bool isIllegalVectorType(QualType Ty) const;
 
+  bool isHomogeneousAggregateBaseType(QualType Ty) const override;
+  bool isHomogeneousAggregateSmallEnough(const Type *Ty,
+                                         uint64_t Members) const override;
+
   void computeInfo(CGFunctionInfo &FI) const override;
 
   llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -4389,101 +4428,6 @@
     RuntimeCC = abiCC;
 }
 
-/// isARMHomogeneousAggregate - Return true if a type is an AAPCS-VFP homogeneous
-/// aggregate.  If HAMembers is non-null, the number of base elements
-/// contained in the type is returned through it; this is used for the
-/// recursive calls that check aggregate component types.
-static bool isARMHomogeneousAggregate(QualType Ty, const Type *&Base,
-                                   ASTContext &Context, bool isAArch64,
-                                   uint64_t *HAMembers) {
-  uint64_t Members = 0;
-  if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
-    if (!isARMHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members))
-      return false;
-    Members *= AT->getSize().getZExtValue();
-  } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
-    const RecordDecl *RD = RT->getDecl();
-    if (RD->hasFlexibleArrayMember())
-      return false;
-
-    Members = 0;
-    for (const auto *FD : RD->fields()) {
-      uint64_t FldMembers;
-      if (!isARMHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers))
-        return false;
-
-      Members = (RD->isUnion() ?
-                 std::max(Members, FldMembers) : Members + FldMembers);
-    }
-  } else {
-    Members = 1;
-    if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
-      Members = 2;
-      Ty = CT->getElementType();
-    }
-
-    // Homogeneous aggregates for AAPCS-VFP must have base types of float,
-    // double, or 64-bit or 128-bit vectors. "long double" has the same machine
-    // type as double, so it is also allowed as a base type.
-    // Homogeneous aggregates for AAPCS64 must have base types of a floating
-    // point type or a short-vector type. This is the same as the 32-bit ABI,
-    // but with the difference that any floating-point type is allowed,
-    // including __fp16.
-    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-      if (isAArch64) {
-        if (!BT->isFloatingPoint())
-          return false;
-      } else {
-        if (BT->getKind() != BuiltinType::Float &&
-            BT->getKind() != BuiltinType::Double &&
-            BT->getKind() != BuiltinType::LongDouble)
-          return false;
-      }
-    } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
-      unsigned VecSize = Context.getTypeSize(VT);
-      if (VecSize != 64 && VecSize != 128)
-        return false;
-    } else {
-      return false;
-    }
-
-    // The base type must be the same for all members.  Vector types of the
-    // same total size are treated as being equivalent here.
-    const Type *TyPtr = Ty.getTypePtr();
-    if (!Base)
-      Base = TyPtr;
-
-    if (Base != TyPtr) {
-      // Homogeneous aggregates are defined as containing members with the
-      // same machine type. There are two cases in which two members have
-      // different TypePtrs but the same machine type:
-
-      // 1) Vectors of the same length, regardless of the type and number
-      //    of their members.
-      const bool SameLengthVectors = Base->isVectorType() && TyPtr->isVectorType()
-        && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
-
-      // 2) In the 32-bit AAPCS, `double' and `long double' have the same
-      //    machine type. This is not the case for the 64-bit AAPCS.
-      const bool SameSizeDoubles =
-           (   (   Base->isSpecificBuiltinType(BuiltinType::Double)
-                && TyPtr->isSpecificBuiltinType(BuiltinType::LongDouble))
-            || (   Base->isSpecificBuiltinType(BuiltinType::LongDouble)
-                && TyPtr->isSpecificBuiltinType(BuiltinType::Double)))
-        && (Context.getTypeSize(Base) == Context.getTypeSize(TyPtr));
-
-      if (!SameLengthVectors && !SameSizeDoubles)
-        return false;
-    }
-  }
-
-  // Homogeneous Aggregates can have at most 4 members of the base type.
-  if (HAMembers)
-    *HAMembers = Members;
-
-  return (Members > 0 && Members <= 4);
-}
-
 /// markAllocatedVFPs - update VFPRegs according to the alignment and
 /// number of VFP registers (unit is S register) requested.
 void ARMABIInfo::markAllocatedVFPs(unsigned Alignment,
@@ -4640,7 +4584,7 @@
     // into VFP registers.
     const Type *Base = nullptr;
     uint64_t Members = 0;
-    if (isARMHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) {
+    if (isHomogeneousAggregate(Ty, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Base can be a floating-point or a vector.
       if (Base->isVectorType()) {
@@ -4845,7 +4789,8 @@
   // Check for homogeneous aggregates with AAPCS-VFP.
   if (getABIKind() == AAPCS_VFP && !isVariadic) {
     const Type *Base = nullptr;
-    if (isARMHomogeneousAggregate(RetTy, Base, getContext(), false)) {
+    uint64_t Members;
+    if (isHomogeneousAggregate(RetTy, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
       return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP);
@@ -4891,6 +4836,27 @@
   return false;
 }
 
+bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // Homogeneous aggregates for AAPCS-VFP must have base types of float,
+  // double, or 64-bit or 128-bit vectors.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->getKind() == BuiltinType::Float ||
+        BT->getKind() == BuiltinType::Double ||
+        BT->getKind() == BuiltinType::LongDouble)
+      return true;
+  } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    unsigned VecSize = getContext().getTypeSize(VT);
+    if (VecSize == 64 || VecSize == 128)
+      return true;
+  }
+  return false;
+}
+
+bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
+                                                   uint64_t Members) const {
+  return Members <= 4;
+}
+
 llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                    CodeGenFunction &CGF) const {
   llvm::Type *BP = CGF.Int8PtrTy;
Index: test/CodeGen/ppc64le-aggregates-cpp.cpp
===================================================================
--- test/CodeGen/ppc64le-aggregates-cpp.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// REQUIRES: powerpc-registered-target
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
-
-// Test that C++ classes are correctly classified as homogeneous aggregates.
-
-struct Base1 {
-  int x;
-};
-struct Base2 {
-  double x;
-};
-struct Base3 {
-  double x;
-};
-struct D1 : Base1 {  // non-homogeneous aggregate
-  double y, z;
-};
-struct D2 : Base2 {  // homogeneous aggregate
-  double y, z;
-};
-struct D3 : Base1, Base2 {  // non-homogeneous aggregate
-  double y, z;
-};
-struct D4 : Base2, Base3 {  // homogeneous aggregate
-  double y, z;
-};
-
-// CHECK: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
-D1 func_D1(D1 x) { return x; }
-
-// CHECK: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
-D2 func_D2(D2 x) { return x; }
-
-// CHECK: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
-D3 func_D3(D3 x) { return x; }
-
-// CHECK: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
-D4 func_D4(D4 x) { return x; }
-
Index: test/CodeGen/ppc64le-aggregates.c
===================================================================
--- test/CodeGen/ppc64le-aggregates.c
+++ test/CodeGen/ppc64le-aggregates.c
@@ -1,4 +1,3 @@
-// REQUIRES: powerpc-registered-target
 // RUN: %clang_cc1 -faltivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
 
 // Test homogeneous float aggregate passing and returning.
@@ -16,6 +15,8 @@
 struct fab { float a; float b; };
 struct fabc { float a; float b; float c; };
 
+struct f2a2b { float a[2]; float b[2]; };
+
 // CHECK: define [1 x float] @func_f1(float inreg %x.coerce)
 struct f1 func_f1(struct f1 x) { return x; }
 
@@ -49,6 +50,9 @@
 // CHECK: define [3 x float] @func_fabc([3 x float] %x.coerce)
 struct fabc func_fabc(struct fabc x) { return x; }
 
+// CHECK: define [4 x float] @func_f2a2b([4 x float] %x.coerce)
+struct f2a2b func_f2a2b(struct f2a2b x) { return x; }
+
 // CHECK-LABEL: @call_f1
 // CHECK: %[[TMP:[^ ]+]] = load float* getelementptr inbounds (%struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
 // CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
Index: test/CodeGenCXX/homogeneous-aggregates.cpp
===================================================================
--- /dev/null
+++ test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -0,0 +1,47 @@
+// RUNxX: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC
+// RUN: %clang_cc1 -mfloat-abi hard -triple armv7-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM32
+// RUN: %clang_cc1 -mfloat-abi hard -triple aarch64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM64
+
+// Test that C++ classes are correctly classified as homogeneous aggregates.
+
+struct Base1 {
+  int x;
+};
+struct Base2 {
+  double x;
+};
+struct Base3 {
+  double x;
+};
+struct D1 : Base1 {  // non-homogeneous aggregate
+  double y, z;
+};
+struct D2 : Base2 {  // homogeneous aggregate
+  double y, z;
+};
+struct D3 : Base1, Base2 {  // non-homogeneous aggregate
+  double y, z;
+};
+struct D4 : Base2, Base3 {  // homogeneous aggregate
+  double y, z;
+};
+
+// PPC: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, { [3 x i64] } %x.coerce)
+// ARM64: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
+D1 func_D1(D1 x) { return x; }
+
+// PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
+// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2)
+D2 func_D2(D2 x) { return x; }
+
+// PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, { [4 x i64] } %x.coerce)
+// ARM64: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, %struct.D3* %x)
+D3 func_D3(D3 x) { return x; }
+
+// PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
+// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3)
+D4 func_D4(D4 x) { return x; }