Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -158,6 +158,23 @@
   return CC_C;
 }
 
+static bool isAAPCSVFP(const CGFunctionInfo &FI, const TargetInfo &Target) {
+  switch (FI.getEffectiveCallingConvention()) {
+  case llvm::CallingConv::C:
+    switch (Target.getTriple().getEnvironment()) {
+    case llvm::Triple::EABIHF:
+    case llvm::Triple::GNUEABIHF:
+      return true;
+    default:
+      return false;
+    }
+  case llvm::CallingConv::ARM_AAPCS_VFP:
+    return true;
+  default:
+    return false;
+  }
+}
+
 /// Arrange the argument and result information for a call to an
 /// unknown C++ non-static member function of the given abstract type.
 /// (Zero value of RD means we don't have any meaningful "this" argument type,
@@ -990,8 +1007,11 @@
       // If the coerce-to type is a first class aggregate, flatten it.  Either
       // way is semantically identical, but fast-isel and the optimizer
       // generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::Type *argType = argAI.getCoerceToType();
-      if (llvm::StructType *st = dyn_cast<llvm::StructType>(argType)) {
+      llvm::StructType *st = dyn_cast<llvm::StructType>(argType);
+      if (st && !isAAPCSVFP(FI, getTarget())) {
         for (unsigned i = 0, e = st->getNumElements(); i != e; ++i)
           argTypes.push_back(st->getElementType(i));
       } else {
@@ -1193,14 +1213,15 @@
       else if (ParamType->isUnsignedIntegerOrEnumerationType())
         Attrs.addAttribute(llvm::Attribute::ZExt);
       // FALL THROUGH
-    case ABIArgInfo::Direct:
+    case ABIArgInfo::Direct: {
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
 
       // FIXME: handle sseregparm someday...
 
-      if (llvm::StructType *STy =
-          dyn_cast<llvm::StructType>(AI.getCoerceToType())) {
+      llvm::StructType *STy =
+          dyn_cast<llvm::StructType>(AI.getCoerceToType());
+      if (!isAAPCSVFP(FI, getTarget()) && STy) {
         unsigned Extra = STy->getNumElements()-1;  // 1 will be added below.
         if (Attrs.hasAttributes())
           for (unsigned I = 0; I < Extra; ++I)
@@ -1209,7 +1230,7 @@
         Index += Extra;
       }
       break;
-
+    }
     case ABIArgInfo::Indirect:
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
@@ -1468,8 +1489,10 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
-      if (STy && STy->getNumElements() > 1) {
+      if (!isAAPCSVFP(FI, getTarget()) && STy && STy->getNumElements() > 1) {
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
         llvm::Type *DstTy =
           cast<llvm::PointerType>(Ptr->getType())->getElementType();
@@ -2705,8 +2728,11 @@
       // If the coerce-to type is a first class aggregate, we flatten it and
       // pass the elements. Either way is semantically identical, but fast-isel
       // and the optimizer generally likes scalar values better than FCAs.
-      if (llvm::StructType *STy =
-            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
+      // We cannot do this for functions using the AAPCS calling convention,
+      // as structures are treated differently by that calling convention.
+      llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
+      if (STy && !isAAPCSVFP(CallInfo, getTarget())) {
         llvm::Type *SrcTy =
           cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -3199,7 +3199,7 @@
 
 private:
   ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, bool &IsHA, bool isVariadic,
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
                                   bool &IsCPRC) const;
   bool isIllegalVectorType(QualType Ty) const;
 
@@ -3305,22 +3305,10 @@
        it != ie; ++it) {
     unsigned PreAllocationVFPs = AllocatedVFPs;
     unsigned PreAllocationGPRs = AllocatedGPRs;
-    bool IsHA = false;
     bool IsCPRC = false;
     // 6.1.2.3 There is one VFP co-processor register class using registers
     // s0-s15 (d0-d7) for passing arguments.
-    it->info = classifyArgumentType(it->type, IsHA, FI.isVariadic(), IsCPRC);
-    assert((IsCPRC || !IsHA) && "Homogeneous aggregates must be CPRCs");
-    // If we do not have enough VFP registers for the HA, any VFP registers
-    // that are unallocated are marked as unavailable. To achieve this, we add
-    // padding of (NumVFPs - PreAllocationVFP) floats.
-    // Note that IsHA will only be set when using the AAPCS-VFP calling convention,
-    // and the callee is not variadic.
-    if (IsHA && AllocatedVFPs > NumVFPs && PreAllocationVFPs < NumVFPs) {
-      llvm::Type *PaddingTy = llvm::ArrayType::get(
-          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocationVFPs);
-      it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
-    }
+    it->info = classifyArgumentType(it->type, FI.isVariadic(), IsCPRC);
 
     // If we have allocated some arguments onto the stack (due to running
     // out of VFP registers), we cannot split an argument between GPRs and
@@ -3517,8 +3505,7 @@
     VFPRegs[i] = 0;
 }
 
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool &IsHA,
-                                            bool isVariadic,
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
                                             bool &IsCPRC) const {
   // We update number of allocated VFPs according to
   // 6.1.2.1 The following argument types are VFP CPRCs:
@@ -3630,9 +3617,8 @@
                Base->isSpecificBuiltinType(BuiltinType::LongDouble));
         markAllocatedVFPs(2, Members * 2);
       }
-      IsHA = true;
       IsCPRC = true;
-      return ABIArgInfo::getExpand();
+      return ABIArgInfo::getDirect();
     }
   }
 
Index: test/CodeGen/arm-aapcs-vfp.c
===================================================================
--- test/CodeGen/arm-aapcs-vfp.c
+++ test/CodeGen/arm-aapcs-vfp.c
@@ -19,13 +19,13 @@
   float f3;
   float f4;
 };
-// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}})
 extern struct homogeneous_struct struct_callee(struct homogeneous_struct);
 struct homogeneous_struct test_struct(struct homogeneous_struct arg) {
   return struct_callee(arg);
 }
 
-// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, [4 x i32] %{{.*}}, ...)
+// CHECK: define arm_aapcs_vfpcc void @test_struct_variadic(%struct.homogeneous_struct* {{.*}}, ...)
 struct homogeneous_struct test_struct_variadic(struct homogeneous_struct arg, ...) {
   return struct_callee(arg);
 }
@@ -33,21 +33,21 @@
 struct nested_array {
   double d[4];
 };
-// CHECK: define arm_aapcs_vfpcc void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}})
 extern void array_callee(struct nested_array);
 void test_array(struct nested_array arg) {
   array_callee(arg);
 }
 
 extern void complex_callee(__complex__ double);
-// CHECK: define arm_aapcs_vfpcc void @test_complex(double %{{.*}}, double %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}})
 void test_complex(__complex__ double cd) {
   complex_callee(cd);
 }
 
 // Long double is the same as double on AAPCS, it should be homogeneous.
 extern void complex_long_callee(__complex__ long double);
-// CHECK: define arm_aapcs_vfpcc void @test_complex_long(double %{{.*}}, double %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_complex_long({ double, double } %{{.*}})
 void test_complex_long(__complex__ long double cd) {
   complex_callee(cd);
 }
@@ -61,7 +61,7 @@
   float f3;
   float f4;
 };
-// CHECK: define arm_aapcs_vfpcc void @test_big([5 x i32] %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_big({ [5 x i32] } %{{.*}})
 extern void big_callee(struct big_struct);
 void test_big(struct big_struct arg) {
   big_callee(arg);
@@ -74,14 +74,14 @@
   float f1;
   int i2;
 };
-// CHECK: define arm_aapcs_vfpcc void @test_hetero([2 x i32] %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_hetero({ [2 x i32] } %{{.*}})
 extern void hetero_callee(struct heterogeneous_struct);
 void test_hetero(struct heterogeneous_struct arg) {
   hetero_callee(arg);
 }
 
 // Neon multi-vector types are homogeneous aggregates.
-// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+// CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}})
 int8x16_t f0(int8x16x4_t v4) {
   return vaddq_s8(v4.val[0], v4.val[3]);
 }
@@ -94,7 +94,7 @@
   int32x2_t v3;
   int16x4_t v4;
 };
-// CHECK: define arm_aapcs_vfpcc void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}})
+// CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}})
 extern void neon_callee(struct neon_struct);
 void test_neon(struct neon_struct arg) {
   neon_callee(arg);
Index: test/CodeGen/arm-homogenous.c
===================================================================
--- test/CodeGen/arm-homogenous.c
+++ test/CodeGen/arm-homogenous.c
@@ -17,7 +17,7 @@
 void test_union_with_first_floats(void) {
   takes_union_with_first_floats(g_u_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats([4 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_first_floats({ [4 x i32] })
 
 void test_return_union_with_first_floats(void) {
   g_u_f = returns_union_with_first_floats();
@@ -37,7 +37,7 @@
 void test_union_with_non_first_floats(void) {
   takes_union_with_non_first_floats(g_u_nf_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats([4 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_non_first_floats({ [4 x i32] })
 
 void test_return_union_with_non_first_floats(void) {
   g_u_nf_f = returns_union_with_non_first_floats();
@@ -57,7 +57,7 @@
 void test_struct_with_union_with_first_floats(void) {
   takes_struct_with_union_with_first_floats(g_s_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats([5 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_first_floats({ [5 x i32] })
 
 void test_return_struct_with_union_with_first_floats(void) {
   g_s_f = returns_struct_with_union_with_first_floats();
@@ -77,7 +77,7 @@
 void test_struct_with_union_with_non_first_floats(void) {
   takes_struct_with_union_with_non_first_floats(g_s_nf_f);
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats([5 x i32])
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_union_with_non_first_floats({ [5 x i32] })
 
 void test_return_struct_with_union_with_non_first_floats(void) {
   g_s_nf_f = returns_struct_with_union_with_non_first_floats();
@@ -103,9 +103,9 @@
 
 void test_struct_with_fundamental_elems(void) {
   takes_struct_with_fundamental_elems(g_s);
-// CHECK:  call arm_aapcs_vfpcc  void @takes_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float{{.*}}, float {{.*}})
+// CHECK:  call arm_aapcs_vfpcc  void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems {{.*}})
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(float, float, float, float)
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_fundamental_elems(%struct.struct_with_fundamental_elems)
 
 void test_return_struct_with_fundamental_elems(void) {
   g_s = returns_struct_with_fundamental_elems();
@@ -124,9 +124,9 @@
 
 void test_struct_with_array(void) {
   takes_struct_with_array(g_s_a);
-// CHECK:   call arm_aapcs_vfpcc  void @takes_struct_with_array(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}})
+// CHECK:   call arm_aapcs_vfpcc  void @takes_struct_with_array(%struct.struct_with_array {{.*}})
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(float, float, float, float)
+// CHECK: declare arm_aapcs_vfpcc void @takes_struct_with_array(%struct.struct_with_array)
 
 void test_return_struct_with_array(void) {
   g_s_a = returns_struct_with_array();
@@ -146,9 +146,9 @@
 
 void test_union_with_struct_with_fundamental_elems(void) {
   takes_union_with_struct_with_fundamental_elems(g_u_s_fe);
-// CHECK: call arm_aapcs_vfpcc  void @takes_union_with_struct_with_fundamental_elems(float {{.*}}, float {{.*}}, float {{.*}}, float {{.*}})
+// CHECK: call arm_aapcs_vfpcc  void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems {{.*}})
 }
-// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(float, float, float, float)
+// CHECK: declare arm_aapcs_vfpcc void @takes_union_with_struct_with_fundamental_elems(%union.union_with_struct_with_fundamental_elems)
 
 void test_return_union_with_struct_with_fundamental_elems(void) {
   g_u_s_fe = returns_union_with_struct_with_fundamental_elems();
@@ -169,7 +169,7 @@
 
 void test_struct_of_four_doubles(void) {
 // CHECK: test_struct_of_four_doubles
-// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}})
   takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
 }
 
@@ -177,14 +177,14 @@
 
 void test_struct_of_four_doubles_variadic(void) {
 // CHECK: test_struct_of_four_doubles_variadic
-// CHECK: call arm_aapcs_vfpcc void (double, [4 x i64], [4 x i64], double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, [4 x i64] {{.*}}, [4 x i64] {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void (double, { [4 x i64] }, { [4 x i64] }, double, ...)* @takes_struct_of_four_doubles_variadic(double {{.*}}, { [4 x i64] } {{.*}}, { [4 x i64] } {{.*}}, double {{.*}})
   takes_struct_of_four_doubles_variadic(3.0, g_s4d, g_s4d, 4.0);
 }
 
 extern void takes_struct_with_backfill(float f1, double a, float f2, struct_of_four_doubles b, struct_of_four_doubles c, double d);
 void test_struct_with_backfill(void) {
 // CHECK: test_struct_with_backfill
-// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [4 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_with_backfill(float {{.*}}, double {{.*}}, float {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}})
   takes_struct_with_backfill(3.0, 3.1, 3.2, g_s4d, g_s4d, 4.0);
 }
 
@@ -201,7 +201,7 @@
 
 void test_struct_of_vecs(void) {
 // CHECK: test_struct_of_vecs
-// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}})
   takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
 }