diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp
--- a/clang/lib/CodeGen/Targets/ARM.cpp
+++ b/clang/lib/CodeGen/Targets/ARM.cpp
@@ -330,6 +330,50 @@
   return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align);
 }
 
+static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
+                              llvm::LLVMContext &VMContext);
+
+// Returns true if `Ty` could be flattened into a valid ABIArgInfo `A`.
+static bool canFlatten(const RecordType *Ty, ASTContext &Ctx, llvm::LLVMContext &LLVMCtx, ABIArgInfo *A) {
+  // AAPCS32 6.5 Parameter Passing
+  // The base standard provides for passing arguments in core registers (r0-r3)
+  // and on the stack.
+  // ...
+  // When a Composite Type argument is assigned to core registers (either fully
+  // or partially), the behavior is as if the argument had been stored to memory
+  // at a word-aligned (4-byte) address and then loaded into consecutive
+  // registers using a suitable load-multiple instruction.
+
+  const RecordDecl *RD = Ty->getDecl();
+  const long NumFields = std::distance(RD->field_begin(), RD->field_end());
+  if (NumFields < 0 || NumFields > 4)
+    return false;
+
+  llvm::SmallVector<llvm::Type*, 4> ParamTypes;
+  uint64_t TotalSize = 0;
+  for (FieldDecl *FD : RD->fields()) {
+    QualType FieldType = FD->getType();
+    if (!FieldType->isIntegerType())
+      return false;
+
+    // Size in bits.
+    uint64_t TypeSize = Ctx.getTypeSize(FieldType);
+    if (TypeSize > 64)
+      return false;
+
+    TotalSize += TypeSize;
+    if (TotalSize > 16 * 8)
+      return false;
+
+    llvm::Type *T = llvm::Type::getIntNTy(LLVMCtx, TypeSize);
+    ParamTypes.push_back(T);
+  }
+
+  // llvm::dbgs() << llvm::StructType::get(LLVMCtx, ParamTypes) << "\n";
+  *A = ABIArgInfo::getDirect(llvm::StructType::get(LLVMCtx, ParamTypes));
+  return true;
+}
+
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
                                             unsigned functionCallConv) const {
   // 6.1.2.1 The following argument types are VFP CPRCs:
@@ -427,11 +471,16 @@
     return coerceToIntArray(Ty, getContext(), getVMContext());
   }
 
+  // Try to flatten small structs.
+  if (getABIKind() == ARMABIKind::AAPCS && Ty->isStructureType()) {
+    ABIArgInfo A;
+    if (canFlatten(Ty->getAs<RecordType>(), getContext(), getVMContext(), &A))
+      return A;
+  }
+
   // Otherwise, pass by coercing to a structure of the appropriate size.
   llvm::Type* ElemTy;
   unsigned SizeRegs;
-  // FIXME: Try to match the types of the arguments more accurately where
-  // we can.
   if (TyAlign <= 4) {
     ElemTy = llvm::Type::getInt32Ty(getVMContext());
     SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
diff --git a/clang/test/CodeGen/aapcs-align.cpp b/clang/test/CodeGen/aapcs-align.cpp
--- a/clang/test/CodeGen/aapcs-align.cpp
+++ b/clang/test/CodeGen/aapcs-align.cpp
@@ -19,10 +19,10 @@
   f0m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g0
-// CHECK: call void @f0(i32 noundef 1, [2 x i32] [i32 6, i32 7]
-// CHECK: call void @f0m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i32] [i32 6, i32 7]
-// CHECK: declare void @f0(i32 noundef, [2 x i32])
-// CHECK: declare void @f0m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i32])
+// CHECK: call void @f0(i32 noundef 1, i32 6, i32 7
+// CHECK: call void @f0m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 6, i32 7)
+// CHECK: declare void @f0(i32 noundef, i32, i32)
+// CHECK: declare void @f0m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32, i32)
 
 // Aligned struct, passed according to its natural alignment.
 struct __attribute__((aligned(8))) S8 {
@@ -37,10 +37,10 @@
   f1m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g1
-// CHECK: call void @f1(i32 noundef 1, [2 x i32] [i32 6, i32 7]
-// CHECK: call void @f1m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [2 x i32] [i32 6, i32 7]
-// CHECK: declare void @f1(i32 noundef, [2 x i32])
-// CHECK: declare void @f1m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [2 x i32])
+// CHECK: call void @f1(i32 noundef 1, i32 6, i32 7)
+// CHECK: call void @f1m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 6, i32 7)
+// CHECK: declare void @f1(i32 noundef, i32, i32)
+// CHECK: declare void @f1m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32, i32)
 
 // Aligned struct, passed according to its natural alignment.
 struct alignas(16) S16 {
@@ -56,10 +56,10 @@
   f2m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g2
-// CHECK: call void @f2(i32 noundef 1, [4 x i32] [i32 6, i32 7
-// CHECK: call void @f2m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7
-// CHECK: declare void @f2(i32 noundef, [4 x i32])
-// CHECK: declare void @f2m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [4 x i32])
+// CHECK: call void @f2(i32 noundef 1, i32 6, i32 7)
+// CHECK: call void @f2m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 6, i32 7)
+// CHECK: declare void @f2(i32 noundef, i32, i32)
+// CHECK: declare void @f2m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32, i32)
 
 // Increased natural alignment.
 struct SF8 {
@@ -75,10 +75,10 @@
   f3m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g3
-// CHECK: call void @f3(i32 noundef 1, [1 x i64] [i64 30064771078]
-// CHECK: call void @f3m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [1 x i64] [i64 30064771078]
-// CHECK: declare void @f3(i32 noundef, [1 x i64])
-// CHECK: declare void @f3m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [1 x i64])
+// CHECK: call void @f3(i32 noundef 1, i32 6, i32 7)
+// CHECK: call void @f3m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 6, i32 7)
+// CHECK: declare void @f3(i32 noundef, i32, i32)
+// CHECK: declare void @f3m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32, i32)
 
 // Increased natural alignment, capped to 8 though.
 struct SF16 {
@@ -114,10 +114,10 @@
   f5m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g5
-// CHECK: call void @f5(i32 noundef 1, [3 x i32] [i32 6, i32 7, i32 0])
-// CHECK: call void @f5m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [3 x i32] [i32 6, i32 7, i32 0])
-// CHECK: declare void @f5(i32 noundef, [3 x i32])
-// CHECK: declare void @f5m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [3 x i32])
+// CHECK: call void @f5(i32 noundef 1, i32 6, i64 0)
+// CHECK: call void @f5m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 6, i64 0)
+// CHECK: declare void @f5(i32 noundef, i32, i64)
+// CHECK: declare void @f5m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32, i64)
 
 
 // Packed and aligned, alignement causes padding at the end.
@@ -134,8 +134,8 @@
   f6m(1, 2, 3, 4, 5, s);
 }
 // CHECK: define{{.*}} void @g6
-// CHECK: call void @f6(i32 noundef 1, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
-// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, [4 x i32] [i32 6, i32 7, i32 0, i32 undef])
-// CHECK: declare void @f6(i32 noundef, [4 x i32])
-// CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, [4 x i32])
+// CHECK: call void @f6(i32 noundef 1, i32 6, i64 0)
+// CHECK: call void @f6m(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 6, i64 0)
+// CHECK: declare void @f6(i32 noundef, i32, i64)
+// CHECK: declare void @f6m(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32, i64)
 }
diff --git a/clang/test/CodeGen/arm-arguments.c b/clang/test/CodeGen/arm-arguments.c
--- a/clang/test/CodeGen/arm-arguments.c
+++ b/clang/test/CodeGen/arm-arguments.c
@@ -158,10 +158,10 @@
 // PR11905
 struct s31 { char x; };
 void f31(struct s31 s) { }
-// AAPCS: @f31([1 x i32] %s.coerce)
+// AAPCS: @f31(i8 %s.coerce)
 // AAPCS: %s = alloca %struct.s31, align 1
-// AAPCS: [[TEMP:%.*]] = alloca [1 x i32], align 4
-// AAPCS: store [1 x i32] %s.coerce, ptr [[TEMP]], align 4
+// AAPCS: [[TEMP:%.*]] = getelementptr inbounds %struct.s31, ptr %s, i32 0, i32 0
+// AAPCS: store i8 %s.coerce, ptr [[TEMP]], align 1
 // APCS-GNU: @f31([1 x i32] %s.coerce)
 // APCS-GNU: %s = alloca %struct.s31, align 1
 // APCS-GNU: [[TEMP:%.*]] = alloca [1 x i32], align 4
@@ -184,8 +184,8 @@
 void f34(struct s34 s);
 void g34(struct s34 *s) { f34(*s); }
 // AAPCS: @g34(ptr noundef %s)
-// AAPCS: %[[a:.*]] = alloca [1 x i32]
-// AAPCS: load [1 x i32], ptr %[[a]]
+// AAPCS: %[[a:.*]] = alloca ptr, align 4
+// AAPCS: load ptr, ptr %[[a]]
 
 // rdar://12596507
 struct s35