Index: clang/lib/Basic/Targets/X86.h
===================================================================
--- clang/lib/Basic/Targets/X86.h
+++ clang/lib/Basic/Targets/X86.h
@@ -302,12 +302,14 @@
                             DiagnosticsEngine &Diags) override;
 
   StringRef getABI() const override {
-    if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F)
+    llvm::Triple::ArchType Arch = getTriple().getArch();
+    if ((Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86) &&
+        SSELevel >= AVX512F)
       return "avx512";
-    if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
+    if ((Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86) &&
+        SSELevel >= AVX)
       return "avx";
-    if (getTriple().getArch() == llvm::Triple::x86 &&
-        MMX3DNowLevel == NoMMX3DNow)
+    if (Arch == llvm::Triple::x86 && MMX3DNowLevel == NoMMX3DNow)
       return "no-mmx";
     return "";
   }
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -1023,6 +1023,9 @@
 // X86-32 ABI Implementation
 //===----------------------------------------------------------------------===//
 
+/// The AVX ABI level for X86 targets.
+enum class X86AVXABILevel { None, AVX, AVX512 };
+
 /// Similar to llvm::CCState, but for Clang.
 struct CCState {
   CCState(CGFunctionInfo &FI)
@@ -1053,7 +1056,9 @@
   bool IsWin32StructABI;
   bool IsSoftFloatABI;
   bool IsMCUABI;
+  bool IsLinuxABI;
   unsigned DefaultNumRegisterParameters;
+  X86AVXABILevel AVXLevel;
 
   static bool isRegisterSize(unsigned Size) {
     return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
@@ -1112,13 +1117,15 @@
 
   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                 bool RetSmallStructInRegABI, bool Win32StructABI,
-                unsigned NumRegisterParameters, bool SoftFloatABI)
-    : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
-      IsRetSmallStructInRegABI(RetSmallStructInRegABI),
-      IsWin32StructABI(Win32StructABI),
-      IsSoftFloatABI(SoftFloatABI),
-      IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
-      DefaultNumRegisterParameters(NumRegisterParameters) {}
+                unsigned NumRegisterParameters, bool SoftFloatABI,
+                X86AVXABILevel AVXLevel)
+      : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
+        IsRetSmallStructInRegABI(RetSmallStructInRegABI),
+        IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
+        IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+        IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()),
+        DefaultNumRegisterParameters(NumRegisterParameters),
+        AVXLevel(AVXLevel) {}
 
   bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
@@ -1139,10 +1146,11 @@
 public:
   X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                           bool RetSmallStructInRegABI, bool Win32StructABI,
-                          unsigned NumRegisterParameters, bool SoftFloatABI)
+                          unsigned NumRegisterParameters, bool SoftFloatABI,
+                          X86AVXABILevel AVXLevel)
       : TargetCodeGenInfo(new X86_32ABIInfo(
             CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
-            NumRegisterParameters, SoftFloatABI)) {}
+            NumRegisterParameters, SoftFloatABI, AVXLevel)) {}
 
   static bool isStructReturnInRegABI(
       const llvm::Triple &Triple, const CodeGenOptions &Opts);
@@ -1538,6 +1546,29 @@
   if (Align <= MinABIStackAlignInBytes)
     return 0; // Use default alignment.
 
+  if (IsLinuxABI) {
+    // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
+    // want to spend any effort dealing with the ramifications of ABI breaks.
+    // If the target only supports doesn't support avx, return 16.
+    // If the target supports avx or avx512, __m256 will align to 32 bytes.
+    // __m512 will align to 64 bytes when the target supports avx512, align to
+    // 32 bytes when the target supports avx and 16 for the other.
+    if (Ty->getAs<VectorType>()) {
+      int TypeSize = getContext().getTypeSize(Ty);
+      if (TypeSize == 128)
+        return Align;
+      else if (TypeSize == 256)
+        return (AVXLevel == X86AVXABILevel::AVX ||
+                AVXLevel == X86AVXABILevel::AVX512)
+                   ? Align
+                   : 16;
+      else
+        return AVXLevel == X86AVXABILevel::AVX512
+                   ? Align
+                   : AVXLevel == X86AVXABILevel::AVX ? 32 : 16;
+    } else
+      return MinABIStackAlignInBytes;
+  }
   // On non-Darwin, the stack type alignment is always 4.
   if (!IsDarwinVectorABI) {
     // Set explicit alignment, since we may need to realign the top.
@@ -2086,12 +2117,6 @@
 
 
 namespace {
-/// The AVX ABI level for X86 targets.
-enum class X86AVXABILevel {
-  None,
-  AVX,
-  AVX512
-};
 
 /// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
 static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
@@ -2432,11 +2457,12 @@
 
 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
 public:
-  WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
-        bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
-        unsigned NumRegisterParameters)
-    : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
-        Win32StructABI, NumRegisterParameters, false) {}
+  WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
+                             bool RetSmallStructInRegABI, bool Win32StructABI,
+                             unsigned NumRegisterParameters)
+      : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
+                                Win32StructABI, NumRegisterParameters, false,
+                                X86AVXABILevel::None) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
@@ -10327,6 +10353,12 @@
         X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
     bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
 
+    StringRef ABI = getTarget().getABI();
+    X86AVXABILevel AVXLevel =
+        (ABI == "avx512"
+             ? X86AVXABILevel::AVX512
+             : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);
+
     if (Triple.getOS() == llvm::Triple::Win32) {
       return SetCGInfo(new WinX86_32TargetCodeGenInfo(
           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
@@ -10335,7 +10367,7 @@
       return SetCGInfo(new X86_32TargetCodeGenInfo(
           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
           IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
-          CodeGenOpts.FloatABI == "soft"));
+          CodeGenOpts.FloatABI == "soft", AVXLevel));
     }
   }
 
Index: clang/test/CodeGen/x86_32-align-linux-avx2.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux-avx2.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include <immintrin.h>
+
+// CHECK-LABEL: define void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+  __m128 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m128);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 31
+// CHECK-NEXT:  %2 = and i32 %1, -32
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm256(int argCount, ...) {
+  __m256 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m256);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 31
+// CHECK-NEXT:  %2 = and i32 %1, -32
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+  __m512 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m512);
+  __builtin_va_end(args);
+}
Index: clang/test/CodeGen/x86_32-align-linux-avx512f.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux-avx512f.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include <immintrin.h>
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 63
+// CHECK-NEXT:  %2 = and i32 %1, -64
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+  __m512 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m512);
+  __builtin_va_end(args);
+}
Index: clang/test/CodeGen/x86_32-align-linux.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/x86_32-align-linux.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: FileCheck < %t %s
+
+#include <immintrin.h>
+
+// CHECK-LABEL: define void @testm128
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm128(int argCount, ...) {
+  __m128 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m128);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm256
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm256(int argCount, ...) {
+  __m256 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m256);
+  __builtin_va_end(args);
+}
+
+// CHECK-LABEL: define void @testm512
+// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4
+// CHECK-NEXT:  %0 = ptrtoint i8* %argp.cur to i32
+// CHECK-NEXT:  %1 = add i32 %0, 15
+// CHECK-NEXT:  %2 = and i32 %1, -16
+// CHECK-NEXT:  %argp.cur.aligned = inttoptr i32 %2 to i8*
+void testm512(int argCount, ...) {
+  __m512 res;
+  __builtin_va_list args;
+  __builtin_va_start(args, argCount);
+  res = __builtin_va_arg(args, __m512);
+  __builtin_va_end(args);
+}