Index: lib/Basic/Targets.cpp
===================================================================
--- lib/Basic/Targets.cpp
+++ lib/Basic/Targets.cpp
@@ -5321,6 +5321,11 @@
     // ARM has atomics up to 8 bytes
     setAtomic();
 
+    if (Triple.getEnvironment() == llvm::Triple::Android)
+      MaxVectorAlign = 128; // don't break existing Android ABI
+    else
+      MaxVectorAlign = 64; // AAPCS
+
     // Do force alignment of members that follow zero length bitfields.  If
     // the alignment of the zero-length bitfield is greater than the member
     // that follows it, `bar', `bar' will be aligned as the  type of the
Index: test/CodeGen/arm-abi-vector.c
===================================================================
--- test/CodeGen/arm-abi-vector.c
+++ test/CodeGen/arm-abi-vector.c
@@ -133,20 +133,20 @@
 
 double varargs_vec_9c(int fixed, ...) {
 // CHECK: varargs_vec_9c
-// CHECK: [[VAR:%.*]] = alloca <9 x i8>, align 16
+// CHECK: [[VAR:%.*]] = alloca <9 x i8>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
 // CHECK: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
-// CHECK: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 16
+// CHECK: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 8
 // APCS-GNU: varargs_vec_9c
-// APCS-GNU: [[VAR:%.*]] = alloca <9 x i8>, align 16
+// APCS-GNU: [[VAR:%.*]] = alloca <9 x i8>, align 8
 // APCS-GNU: [[AP:%.*]] = load i8*,
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
 // APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <9 x i8>*
 // APCS-GNU: [[VEC:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 4
-// APCS-GNU: store <9 x i8> [[VEC]], <9 x i8>* [[VAR]], align 16
+// APCS-GNU: store <9 x i8> [[VEC]], <9 x i8>* [[VAR]], align 8
 // ANDROID: varargs_vec_9c
 // ANDROID: [[VAR:%.*]] = alloca <9 x i8>, align 16
 // ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
@@ -246,15 +246,15 @@
 
 double varargs_vec_5s(int fixed, ...) {
 // CHECK: varargs_vec_5s
-// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
+// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
 // CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
 // CHECK: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
-// CHECK: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 16
+// CHECK: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 8
 // APCS-GNU: varargs_vec_5s
-// APCS-GNU: [[VAR:%.*]] = alloca <5 x i16>, align 16
+// APCS-GNU: [[VAR:%.*]] = alloca <5 x i16>, align 8
 // APCS-GNU: [[AP:%.*]] = load i8*,
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
 // APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i16>*
Index: test/CodeGen/arm-neon-misc.c
===================================================================
--- test/CodeGen/arm-neon-misc.c
+++ test/CodeGen/arm-neon-misc.c
@@ -32,3 +32,11 @@
     *dst = q;
 // CHECK: store <2 x i64>
 }
+
+// Neon types have 64-bit alignment
+int32x4_t gl_b;
+void t3(int32x4_t *src) {
+// CHECK: @t3
+  gl_b = *src;
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* @gl_b, align 8
+}
Index: test/CodeGen/arm-swiftcall.c
===================================================================
--- test/CodeGen/arm-swiftcall.c
+++ test/CodeGen/arm-swiftcall.c
@@ -342,7 +342,7 @@
 } union_hom_fp_partial;
 TEST(union_hom_fp_partial)
 // CHECK-LABEL: define void @test_union_hom_fp_partial()
-// CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 16
+// CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 8
 // CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ float, float, float, float }]] @return_union_hom_fp_partial()
 // CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ float, float, float, float }]]*
 // CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
@@ -375,7 +375,7 @@
 } union_het_fpv_partial;
 TEST(union_het_fpv_partial)
 // CHECK-LABEL: define void @test_union_het_fpv_partial()
-// CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 16
+// CHECK:   [[TMP:%.*]] = alloca [[REC:%.*]], align 8
 // CHECK:   [[CALL:%.*]] = call [[SWIFTCC]] [[UAGG:{ i32, i32, float, float }]] @return_union_het_fpv_partial()
 // CHECK:   [[CAST_TMP:%.*]] = bitcast [[REC]]* [[TMP]] to [[AGG:{ i32, i32, float, float }]]*
 // CHECK:   [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[CAST_TMP]], i32 0, i32 0
@@ -412,7 +412,7 @@
 
 TEST(int8)
 // CHECK-LABEL: define {{.*}} @return_int8()
-// CHECK:   [[RET:%.*]] = alloca [[REC:<8 x i32>]], align 32
+// CHECK:   [[RET:%.*]] = alloca [[REC:<8 x i32>]], align 8
 // CHECK:   [[VAR:%.*]] = alloca [[REC]], align
 // CHECK:   store
 // CHECK:   load
@@ -456,7 +456,7 @@
 
 TEST(int5)
 // CHECK-LABEL: define {{.*}} @return_int5()
-// CHECK:   [[RET:%.*]] = alloca [[REC:<5 x i32>]], align 32
+// CHECK:   [[RET:%.*]] = alloca [[REC:<5 x i32>]], align 8
 // CHECK:   [[VAR:%.*]] = alloca [[REC]], align
 // CHECK:   store
 // CHECK:   load
Index: test/CodeGen/armv7k-abi.c
===================================================================
--- test/CodeGen/armv7k-abi.c
+++ test/CodeGen/armv7k-abi.c
@@ -83,11 +83,11 @@
 OddlySizedStruct return_oddly_sized_struct() {}
 
 // CHECK: define <4 x float> @test_va_arg_vec(i8* %l)
-// CHECK:   [[ALIGN_TMP:%.*]] = add i32 {{%.*}}, 15
-// CHECK:   [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -16
+// CHECK:   [[ALIGN_TMP:%.*]] = add i32 {{%.*}}, 7
+// CHECK:   [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -8
 // CHECK:   [[ALIGNED_I8:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
 // CHECK:   [[ALIGNED_VEC:%.*]] = bitcast i8* [[ALIGNED_I8]] to <4 x float>
-// CHECK:   load <4 x float>, <4 x float>* [[ALIGNED_VEC]], align 16
+// CHECK:   load <4 x float>, <4 x float>* [[ALIGNED_VEC]], align 8
 float32x4_t test_va_arg_vec(__builtin_va_list l) {
   return __builtin_va_arg(l, float32x4_t);
 }