Index: cfe/trunk/lib/CodeGen/TargetInfo.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp
@@ -4730,6 +4730,11 @@
     }
   }
 
+  bool isAndroid() const {
+    return (getTarget().getTriple().getEnvironment() ==
+            llvm::Triple::Android);
+  }
+
   ABIKind getABIKind() const { return Kind; }
 
 private:
@@ -5233,15 +5238,27 @@
 
 /// isIllegalVector - check whether Ty is an illegal vector type.
 bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
-  if (const VectorType *VT = Ty->getAs<VectorType>()) {
-    // Check whether VT is legal.
-    unsigned NumElements = VT->getNumElements();
-    uint64_t Size = getContext().getTypeSize(VT);
-    // NumElements should be power of 2.
-    if ((NumElements & (NumElements - 1)) != 0)
-      return true;
-    // Size should be greater than 32 bits.
-    return Size <= 32;
+  if (const VectorType *VT = Ty->getAs<VectorType> ()) {
+    if (isAndroid()) {
+      // Android shipped using Clang 3.1, which supported a slightly different
+      // vector ABI. The primary differences were that 3-element vector types
+      // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path
+      // accepts that legacy behavior for Android only.
+      // Check whether VT is legal.
+      unsigned NumElements = VT->getNumElements();
+      // NumElements should be power of 2 or equal to 3.
+      if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3)
+        return true;
+    } else {
+      // Check whether VT is legal.
+      unsigned NumElements = VT->getNumElements();
+      uint64_t Size = getContext().getTypeSize(VT);
+      // NumElements should be power of 2.
+      if (!llvm::isPowerOf2_32(NumElements))
+        return true;
+      // Size should be greater than 32 bits.
+      return Size <= 32;
+    }
   }
   return false;
 }
Index: cfe/trunk/test/CodeGen/arm-abi-vector.c
===================================================================
--- cfe/trunk/test/CodeGen/arm-abi-vector.c
+++ cfe/trunk/test/CodeGen/arm-abi-vector.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi aapcs -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck -check-prefix=APCS-GNU %s
+// RUN: %clang_cc1 -triple arm-linux-androideabi -emit-llvm -o - %s | FileCheck -check-prefix=ANDROID %s
 
 #include <stdarg.h>
 
@@ -28,6 +29,14 @@
 // APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <2 x i32>*
 // APCS-GNU: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 4
 // APCS-GNU: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
+// ANDROID: varargs_vec_2i
+// ANDROID: [[VAR:%.*]] = alloca <2 x i32>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <2 x i32>*
+// ANDROID: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 8
+// ANDROID: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -42,6 +51,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
 // APCS-GNU: test_2i
 // APCS-GNU: call double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
+// ANDROID: test_2i
+// ANDROID: call double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
   return varargs_vec_2i(3, *in);
 }
 
@@ -54,6 +65,10 @@
 // APCS-GNU: alloca <3 x i8>, align 4
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
 // APCS-GNU: bitcast i8* [[AP]] to <3 x i8>*
+// ANDROID: varargs_vec_3c
+// ANDROID: alloca <3 x i8>, align 4
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// ANDROID: bitcast i8* [[AP]] to <3 x i8>*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -68,6 +83,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_3c(i32 3, i32 {{%.*}})
 // APCS-GNU: test_3c
 // APCS-GNU: call double (i32, ...) @varargs_vec_3c(i32 3, i32 {{%.*}})
+// ANDROID: test_3c
+// ANDROID: call double (i32, ...) @varargs_vec_3c(i32 3, <3 x i8> {{%.*}})
   return varargs_vec_3c(3, *in);
 }
 
@@ -87,6 +104,14 @@
 // APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i8>*
 // APCS-GNU: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 4
 // APCS-GNU: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
+// ANDROID: varargs_vec_5c
+// ANDROID: [[VAR:%.*]] = alloca <5 x i8>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i8>*
+// ANDROID: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 8
+// ANDROID: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -101,6 +126,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
 // APCS-GNU: test_5c
 // APCS-GNU: call double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
+// ANDROID: test_5c
+// ANDROID: call double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
   return varargs_vec_5c(5, *in);
 }
 
@@ -120,6 +147,14 @@
 // APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <9 x i8>*
 // APCS-GNU: [[VEC:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 4
 // APCS-GNU: store <9 x i8> [[VEC]], <9 x i8>* [[VAR]], align 16
+// ANDROID: varargs_vec_9c
+// ANDROID: [[VAR:%.*]] = alloca <9 x i8>, align 16
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
+// ANDROID: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
+// ANDROID: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 16
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -134,6 +169,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
 // APCS-GNU: test_9c
 // APCS-GNU: call double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
+// ANDROID: test_9c
+// ANDROID: call double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
   return varargs_vec_9c(9, *in);
 }
 
@@ -146,6 +183,10 @@
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
 // APCS-GNU: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
 // APCS-GNU: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
+// ANDROID: varargs_vec_19c
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// ANDROID: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
+// ANDROID: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -160,6 +201,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
 // APCS-GNU: test_19c
 // APCS-GNU: call double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
+// ANDROID: test_19c
+// ANDROID: call double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
   return varargs_vec_19c(19, *in);
 }
 
@@ -176,6 +219,12 @@
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 8
 // APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <3 x i16>*
 // APCS-GNU: [[VEC:%.*]] = load <3 x i16>, <3 x i16>* [[AP_CAST]], align 4
+// ANDROID: varargs_vec_3s
+// ANDROID: alloca <3 x i16>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: bitcast i8* [[AP_ALIGN]] to <3 x i16>*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -190,6 +239,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_3s(i32 3, <2 x i32> {{%.*}})
 // APCS-GNU: test_3s
 // APCS-GNU: call double (i32, ...) @varargs_vec_3s(i32 3, <2 x i32> {{%.*}})
+// ANDROID: test_3s
+// ANDROID: call double (i32, ...) @varargs_vec_3s(i32 3, <3 x i16> {{%.*}})
   return varargs_vec_3s(3, *in);
 }
 
@@ -208,6 +259,14 @@
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
 // APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i16>*
 // APCS-GNU: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 4
+// ANDROID: varargs_vec_5s
+// ANDROID: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
+// ANDROID: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
+// ANDROID: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 16
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -222,6 +281,8 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
 // APCS-GNU: test_5s
 // APCS-GNU: call double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
+// ANDROID: test_5s
+// ANDROID: call double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
   return varargs_vec_5s(5, *in);
 }
 
@@ -243,6 +304,11 @@
 // APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i32 16
 // APCS-GNU: bitcast %struct.StructWithVec* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
+// ANDROID: varargs_struct
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: bitcast i8* [[AP_ALIGN]] to %struct.StructWithVec*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -257,5 +323,7 @@
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
 // APCS-GNU: test_struct
 // APCS-GNU: call double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
+// ANDROID: test_struct
+// ANDROID: call double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
   return varargs_struct(3, *d);
 }