diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -125,7 +125,10 @@
 ABI Changes in Clang
 --------------------
 
-- ...
+- gcc passes vectors of __int128 in memory on X86-64. Clang historically
+  broke the vectors into multiple scalars using two 64-bit values for each
+  element. Clang now matches the gcc behavior on Linux and NetBSD. You can
+  switch back to old API behavior with flag: -fclang-abi-compat=9.0.
 
 OpenMP Support in Clang
 -----------------------
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -138,6 +138,12 @@
     /// rather than returning the required alignment.
     Ver7,
 
+    /// Attempt to be ABI-compatible with code generated by Clang 9.0.x
+    /// (SVN r351319). This causes vectors of __int128 to be passed in memory
+    /// instead of passing in multiple scalar registers on x86_64 on Linux and
+    /// NetBSD.
+    Ver9,
+
     /// Conform to the underlying platform's C and C++ ABIs as closely
     /// as we can.
     Latest
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -2177,6 +2177,17 @@
     return true;
   }
 
+  // GCC classifies vectors of __int128 as memory.
+  bool passInt128VectorsInMem() const {
+    // Clang <= 9.0 did not do this.
+    if (getContext().getLangOpts().getClangABICompat() <=
+        LangOptions::ClangABI::Ver9)
+      return false;
+
+    const llvm::Triple &T = getTarget().getTriple();
+    return T.isOSLinux() || T.isOSNetBSD();
+  }
+
   X86AVXABILevel AVXLevel;
   // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
   // 64-bit hardware.
@@ -2657,6 +2668,14 @@
         Hi = Lo;
     } else if (Size == 128 ||
                (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
+      QualType ElementType = VT->getElementType();
+
+      // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
+      if (passInt128VectorsInMem() && Size != 128 &&
+          (ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
+           ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
+        return;
+
       // Arguments of 256-bits are split into four eightbyte chunks. The
       // least significant one belongs to class SSE and all the others to class
       // SSEUP. The original Lo and Hi design considers that types can't be
@@ -2899,6 +2918,11 @@
     unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
     if (Size <= 64 || Size > LargestVector)
       return true;
+    QualType EltTy = VecTy->getElementType();
+    if (passInt128VectorsInMem() &&
+        (EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
+         EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
+      return true;
   }
 
   return false;
@@ -2973,14 +2997,28 @@
     Ty = QualType(InnerTy, 0);
 
   llvm::Type *IRType = CGT.ConvertType(Ty);
-  if (isa<llvm::VectorType>(IRType) ||
-      IRType->getTypeID() == llvm::Type::FP128TyID)
+  if (isa<llvm::VectorType>(IRType)) {
+    // Don't pass vXi128 vectors in their native type, the backend can't
+    // legalize them.
+    if (passInt128VectorsInMem() &&
+        IRType->getVectorElementType()->isIntegerTy(128)) {
+      // Use a vXi64 vector.
+      uint64_t Size = getContext().getTypeSize(Ty);
+      return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()),
+                                   Size / 64);
+    }
+
+    return IRType;
+  }
+
+  if (IRType->getTypeID() == llvm::Type::FP128TyID)
     return IRType;
 
   // We couldn't find the preferred IR vector type for 'Ty'.
   uint64_t Size = getContext().getTypeSize(Ty);
   assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
 
+
   // Return a LLVM IR vector type based on the size of 'Ty'.
   return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
                                Size / 64);
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -3129,6 +3129,8 @@
         Opts.setClangABICompat(LangOptions::ClangABI::Ver6);
       else if (Major <= 7)
         Opts.setClangABICompat(LangOptions::ClangABI::Ver7);
+      else if (Major <= 9)
+        Opts.setClangABICompat(LangOptions::ClangABI::Ver9);
     } else if (Ver != "latest") {
       Diags.Report(diag::err_drv_invalid_value)
           << A->getAsString(Args) << A->getValue();
diff --git a/clang/test/CodeGen/pr42607.c b/clang/test/CodeGen/pr42607.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/pr42607.c
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,MEM256ALIGN32,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,MEM256ALIGN32,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN16,MEM512ALIGN16
+// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN32,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN32,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN32,MEM512ALIGN64
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN32
+// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN64
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN64
+
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,NEWABI512
+// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,NEWABI512
+// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512
+// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512
+// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512
+// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512
+
+typedef unsigned long long v16u64 __attribute__((vector_size(16)));
+typedef unsigned __int128 v16u128 __attribute__((vector_size(16)));
+
+v16u64 test_v16u128(v16u64 a, v16u128 b) {
+// NEWABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+// OLDABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}})
+  return a + (v16u64)b;
+}
+
+typedef unsigned long long v32u64 __attribute__((vector_size(32)));
+typedef unsigned __int128 v32u128 __attribute__((vector_size(32)));
+
+v32u64 test_v32u128(v32u64 a, v32u128 b) {
+// MEM256ALIGN16: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 16 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 16 %{{.*}})
+// MEM256ALIGN32: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 32 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
+// NEWABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
+// OLDABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}})
+  return a + (v32u64)b;
+}
+
+typedef unsigned long long v64u64 __attribute__((vector_size(64)));
+typedef unsigned __int128 v64u128 __attribute__((vector_size(64)));
+
+v64u64 test_v64u128(v64u64 a, v64u128 b) {
+// MEM512ALIGN16: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 16 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 16 %{{.*}})
+// MEM512ALIGN32: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 32 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 32 %{{.*}})
+// MEM512ALIGN64: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 64 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
+// NEWABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
+// OLDABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}})
+  return a + (v64u64)b;
+}