Index: lib/Target/ARM/ARMISelDAGToDAG.cpp
===================================================================
--- lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2511,15 +2511,21 @@
     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+    MachineFrameInfo *MFI = MF->getFrameInfo();
     if (Subtarget->isThumb1Only()) {
       // Set the alignment of the frame object to 4, to avoid having to generate
       // more than one ADD
-      MachineFrameInfo *MFI = MF->getFrameInfo();
       if (MFI->getObjectAlignment(FI) < 4)
         MFI->setObjectAlignment(FI, 4);
       return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
                                   CurDAG->getTargetConstant(0, MVT::i32));
     } else {
+      // Arrays (or other objects) whose address leaks into another function
+      // may end up being memcpy'd there. memcpy typically tries to use LDM/STM
+      // if the source/dest is aligned and the copy size is large enough.
+      // Therefore align frame objects so that this can happen.
+      if (MFI->getObjectSize(FI) >= 8 && MFI->getObjectAlignment(FI) < 4)
+        MFI->setObjectAlignment(FI, 4);
       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
                       ARM::t2ADDri : ARM::ADDri);
       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
Index: test/CodeGen/ARM/stack-object-align.ll
===================================================================
--- /dev/null
+++ test/CodeGen/ARM/stack-object-align.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mtriple=arm-eabi < %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv7-eabi < %s -o - | FileCheck %s
+
+; Expect that small arrays are not aligned
+define void @test1() {
+entry:
+  %arr1 = alloca [3 x i8], align 1
+  %arr2 = alloca [3 x i8], align 1
+
+; CHECK: add{{(\.w)?}} r0, sp, #5
+  %arraydecay = getelementptr inbounds [3 x i8]* %arr1, i32 0, i32 0
+; CHECK: bl takeptr
+  call void @takeptr(i8* %arraydecay)
+
+; CHECK: add{{(\.w)?}} r0, sp, #2
+  %arraydecay1 = getelementptr inbounds [3 x i8]* %arr2, i32 0, i32 0
+; CHECK: bl takeptr
+  call void @takeptr(i8* %arraydecay1)
+
+  ret void
+}
+
+; Expect that larger arrays are aligned
+define void @test2() {
+entry:
+  %arr1 = alloca [9 x i8], align 1
+  %arr2 = alloca [9 x i8], align 1
+
+; CHECK: add{{(\.w)?}} r0, sp, #12
+  %arraydecay = getelementptr inbounds [9 x i8]* %arr1, i32 0, i32 0
+; CHECK: bl takeptr
+  call void @takeptr(i8* %arraydecay)
+
+; CHECK: mov r0, sp
+  %arraydecay1 = getelementptr inbounds [9 x i8]* %arr2, i32 0, i32 0
+; CHECK: bl takeptr
+  call void @takeptr(i8* %arraydecay1)
+
+  ret void
+}
+
+; Expect that larger arrays only accessed through array access are not aligned
+define void @test3() {
+entry:
+  %arr1 = alloca [9 x i8], align 1
+  %arr2 = alloca [9 x i8], align 1
+
+; CHECK: strb{{(\.w)?}} {{r[0-9]+}}, [sp, #11]
+  %arrayidx = getelementptr inbounds [9 x i8]* %arr1, i32 0, i32 0
+  store i8 1, i8* %arrayidx, align 1
+
+; CHECK: strb{{(\.w)?}} {{r[0-9]+}}, [sp, #2]
+  %arrayidx1 = getelementptr inbounds [9 x i8]* %arr2, i32 0, i32 0
+  store i8 1, i8* %arrayidx1, align 1
+
+  ret void
+}
+
+declare void @takeptr(i8*)