Index: include/clang/CodeGen/CGFunctionInfo.h
===================================================================
--- include/clang/CodeGen/CGFunctionInfo.h
+++ include/clang/CodeGen/CGFunctionInfo.h
@@ -87,6 +87,7 @@
   bool IndirectRealign : 1; // isIndirect()
   bool SRetAfterThis : 1;   // isIndirect()
   bool InReg : 1;           // isDirect() || isExtend() || isIndirect()
+  bool CanBeFlattened: 1;   // isDirect()
 
   ABIArgInfo(Kind K)
       : PaddingType(nullptr), TheKind(K), PaddingInReg(false), InReg(false) {}
@@ -97,11 +98,13 @@
         TheKind(Direct), PaddingInReg(false), InReg(false) {}
 
   static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
-                              llvm::Type *Padding = nullptr) {
+                              llvm::Type *Padding = nullptr,
+                              bool CanBeFlattened = true) {
     auto AI = ABIArgInfo(Direct);
     AI.setCoerceToType(T);
     AI.setDirectOffset(Offset);
     AI.setPaddingType(Padding);
+    AI.setCanBeFlattened(CanBeFlattened);
     return AI;
   }
   static ABIArgInfo getDirectInReg(llvm::Type *T = nullptr) {
@@ -265,6 +268,16 @@
     InAllocaSRet = SRet;
   }
 
+  bool getCanBeFlattened() const {
+    assert(isDirect() && "Invalid kind!");
+    return CanBeFlattened;
+  }
+
+  void setCanBeFlattened(bool Flatten) {
+    assert(isDirect() && "Invalid kind!");
+    CanBeFlattened = Flatten;
+  }
+
   void dump() const;
 };
 
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -139,23 +139,6 @@
   return CC_C;
 }
 
-static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) {
-  switch (FI.getEffectiveCallingConvention()) {
-  case llvm::CallingConv::C:
-    switch (Target.getTriple().getEnvironment()) {
-    case llvm::Triple::EABIHF:
-    case llvm::Triple::GNUEABIHF:
-      return true;
-    default:
-      return false;
-    }
-  case llvm::CallingConv::ARM_AAPCS_VFP:
-    return true;
-  default:
-    return false;
-  }
-}
-
 /// Arrange the argument and result information for a call to an
 /// unknown C++ non-static member function of the given abstract type.
 /// (Zero value of RD means we don't have any meaningful "this" argument type,
@@ -992,11 +975,9 @@
       // If the coerce-to type is a first class aggregate, flatten it.  Either
       // way is semantically identical, but fast-isel and the optimizer
       // generally likes scalar values better than FCAs.
-      // We cannot do this for functions using the AAPCS calling convention,
-      // as structures are treated differently by that calling convention.
       llvm::Type *argType = argAI.getCoerceToType();
       llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
-      if (st && !isAAPCSVFP(FI, getTarget())) {
+      if (st && argAI.isDirect() && argAI.getCanBeFlattened()) {
         for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
           argTypes.push_back(st->getElementType(i));
       } else {
@@ -1134,7 +1115,7 @@
     case ABIArgInfo::Direct: {
       // FIXME: handle sseregparm someday...
       llvm::StructType *STy = dyn_cast<llvm::StructType>(AI.getCoerceToType());
-      if (!isAAPCSVFP(FI, CGM.getTarget()) && STy) {
+      if (AI.isDirect() && AI.getCanBeFlattened() && STy) {
         IRArgs.NumberOfArgs = STy->getNumElements();
       } else {
         IRArgs.NumberOfArgs = 1;
@@ -1674,10 +1655,9 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      // We cannot do this for functions using the AAPCS calling convention,
-      // as structures are treated differently by that calling convention.
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
-      if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) {
+      if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
+          STy->getNumElements() > 1) {
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
         llvm::Type *DstTy =
           cast<llvm::PointerType>(Ptr->getType())->getElementType();
@@ -2932,11 +2912,9 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      // We cannot do this for functions using the AAPCS calling convention,
-      // as structures are treated differently by that calling convention.
       llvm::StructType *STy =
             dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
-      if (STy && !isAAPCSVFP(CallInfo, getTarget())) {
+      if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
         llvm::Type *SrcTy =
           cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -4166,6 +4166,9 @@
   // unallocated are marked as unavailable. 
   resetAllocatedRegs();
 
+  const bool isAAPCS_VFP =
+      getABIKind() == ARMABIInfo::AAPCS_VFP && !FI.isVariadic();
+
   if (getCXXABI().classifyReturnType(FI)) {
     if (FI.getReturnInfo().isIndirect())
       markAllocatedGPRs(1, 1);
@@ -4196,10 +4199,10 @@
           llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs);
       if (I.info.canHaveCoerceToType()) {
         I.info = ABIArgInfo::getDirect(I.info.getCoerceToType() /* type */, 0 /* offset */,
-                                       PaddingTy);
+                                       PaddingTy, !isAAPCS_VFP);
       } else {
         I.info = ABIArgInfo::getDirect(nullptr /* type */, 0 /* offset */,
-                                       PaddingTy);
+                                       PaddingTy, !isAAPCS_VFP);
       }
     }
   }
@@ -4395,6 +4398,9 @@
   //   64-bit containerized vectors or 128-bit containerized vectors with one
   //   to four Elements.
 
+  const bool isAAPCS_VFP =
+      getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic;
+
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
@@ -4402,7 +4408,7 @@
       llvm::Type *ResType =
           llvm::Type::getInt32Ty(getVMContext());
       markAllocatedGPRs(1, 1);
-      return ABIArgInfo::getDirect(ResType);
+      return ABIArgInfo::getDirect(ResType, 0, nullptr, !isAAPCS_VFP);
     }
     if (Size == 64) {
       llvm::Type *ResType = llvm::VectorType::get(
@@ -4413,7 +4419,7 @@
         markAllocatedVFPs(2, 2);
         IsCPRC = true;
       }
-      return ABIArgInfo::getDirect(ResType);
+      return ABIArgInfo::getDirect(ResType, 0, nullptr, !isAAPCS_VFP);
     }
     if (Size == 128) {
       llvm::Type *ResType = llvm::VectorType::get(
@@ -4424,7 +4430,7 @@
         markAllocatedVFPs(4, 4);
         IsCPRC = true;
       }
-      return ABIArgInfo::getDirect(ResType);
+      return ABIArgInfo::getDirect(ResType, 0, nullptr, !isAAPCS_VFP);
     }
     markAllocatedGPRs(1, 1);
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
@@ -4463,8 +4469,9 @@
     unsigned Size = getContext().getTypeSize(Ty);
     if (!IsCPRC)
       markAllocatedGPRs(Size > 32 ? 2 : 1, (Size + 31) / 32);
-    return (Ty->isPromotableIntegerType() ?
-            ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+    return (Ty->isPromotableIntegerType()
+                ? ABIArgInfo::getExtend()
+                : ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP));
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
@@ -4476,7 +4483,7 @@
   if (isEmptyRecord(getContext(), Ty, true))
     return ABIArgInfo::getIgnore();
 
-  if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
+  if (isAAPCS_VFP) {
     // Homogeneous Aggregates need to be expanded when we can fit the aggregate
     // into VFP registers.
     const Type *Base = nullptr;
@@ -4497,7 +4504,7 @@
         markAllocatedVFPs(2, Members * 2);
       }
       IsCPRC = true;
-      return ABIArgInfo::getDirect();
+      return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP);
     }
   }
 
@@ -4537,7 +4544,7 @@
 
   llvm::Type *STy =
     llvm::StructType::get(llvm::ArrayType::get(ElemTy, SizeRegs), NULL);
-  return ABIArgInfo::getDirect(STy);
+  return ABIArgInfo::getDirect(STy, 0, nullptr, !isAAPCS_VFP);
 }
 
 static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
@@ -4627,6 +4634,9 @@
 
 ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
                                           bool isVariadic) const {
+  const bool isAAPCS_VFP =
+      getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic;
+
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
@@ -4641,8 +4651,9 @@
     if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
       RetTy = EnumTy->getDecl()->getIntegerType();
 
-    return (RetTy->isPromotableIntegerType() ?
-            ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
+    return (RetTy->isPromotableIntegerType()
+                ? ABIArgInfo::getExtend()
+                : ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP));
   }
 
   // Are we following APCS?
@@ -4655,8 +4666,8 @@
     // FIXME: Consider using 2 x vector types if the back end handles them
     // correctly.
     if (RetTy->isAnyComplexType())
-      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
-                                              getContext().getTypeSize(RetTy)));
+      return ABIArgInfo::getDirect(llvm::IntegerType::get(
+          getVMContext(), getContext().getTypeSize(RetTy)));
 
     // Integer like structures are returned in r0.
     if (isIntegerLikeType(RetTy, getContext(), getVMContext())) {
@@ -4685,7 +4696,7 @@
     if (isHomogeneousAggregate(RetTy, Base, getContext())) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
-      return ABIArgInfo::getDirect();
+      return ABIArgInfo::getDirect(nullptr, 0, nullptr, !isAAPCS_VFP);
     }
   }
 
@@ -4695,14 +4706,15 @@
   if (Size <= 32) {
     if (getDataLayout().isBigEndian())
       // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
-      return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+      return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()), 0,
+                                   nullptr, !isAAPCS_VFP);
 
     // Return in the smallest viable integer type.
     if (Size <= 8)
-      return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
+      return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()), 0, nullptr, !isAAPCS_VFP);
     if (Size <= 16)
-      return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
-    return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+      return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()), 0, nullptr, !isAAPCS_VFP);
+    return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()), 0, nullptr, !isAAPCS_VFP);
   }
 
   markAllocatedGPRs(1, 1);
Index: test/CodeGen/arm-homogenous.c
===================================================================
--- test/CodeGen/arm-homogenous.c
+++ test/CodeGen/arm-homogenous.c
@@ -186,7 +186,7 @@
 
 void test_struct_of_four_doubles_variadic(void) {
 // CHECK: test_struct_of_four_doubles_variadic
-// CHECK: call arm_aapcs_vfpcc void (double, { [4 x i64] }, { [4 x i64] }, double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, { [4 x i64] } {{.*}}, { [4 x i64] } {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void (double, [4 x i64], [4 x i64], double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, [4 x i64] {{.*}}, [4 x i64] {{.*}}, double {{.*}})
   takes_struct_of_four_doubles_variadic(3.0, g_s4d, g_s4d, 4.0);
 }