Index: cfe/trunk/lib/CodeGen/TargetInfo.cpp
===================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp
@@ -915,14 +915,6 @@
                                            : ABIArgInfo::getDirect());
 }
 
-/// IsX86_MMXType - Return true if this is an MMX type.
-bool IsX86_MMXType(llvm::Type *IRType) {
-  // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
-  return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
-    cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
-    IRType->getScalarSizeInBits() != 64;
-}
-
 static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
                                           StringRef Constraint,
                                           llvm::Type* Ty) {
@@ -1011,6 +1003,7 @@
   bool IsSoftFloatABI;
   bool IsMCUABI;
   unsigned DefaultNumRegisterParameters;
+  bool IsMMXEnabled;
 
   static bool isRegisterSize(unsigned Size) {
     return (Size == 8 || Size == 16 || Size == 32 || Size == 64);
@@ -1070,13 +1063,15 @@
 
   X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                 bool RetSmallStructInRegABI, bool Win32StructABI,
-                unsigned NumRegisterParameters, bool SoftFloatABI)
+                unsigned NumRegisterParameters, bool SoftFloatABI,
+                bool MMXEnabled)
     : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
       IsRetSmallStructInRegABI(RetSmallStructInRegABI),
       IsWin32StructABI(Win32StructABI),
       IsSoftFloatABI(SoftFloatABI),
       IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
-      DefaultNumRegisterParameters(NumRegisterParameters) {}
+      DefaultNumRegisterParameters(NumRegisterParameters),
+      IsMMXEnabled(MMXEnabled) {}
 
   bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
                                     bool asReturnValue) const override {
@@ -1091,16 +1086,30 @@
     // x86-32 lowering does not support passing swifterror in a register.
     return false;
   }
+
+  bool isPassInMMXRegABI() const {
+    // The System V i386 psABI requires __m64 to be passed in MMX registers.
+    // Clang historically had a bug where it failed to apply this rule, and
+    // some platforms (e.g. Darwin, PS4, and FreeBSD) have opted to maintain
+    // compatibility with the old Clang behavior, so we only apply it on
+    // platforms that have specifically requested it (currently just Linux and
+    // NetBSD).
+    const llvm::Triple &T = getTarget().getTriple();
+    if (IsMMXEnabled && (T.isOSLinux() || T.isOSNetBSD()))
+      return true;
+    return false;
+  }
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
   X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
                           bool RetSmallStructInRegABI, bool Win32StructABI,
-                          unsigned NumRegisterParameters, bool SoftFloatABI)
+                          unsigned NumRegisterParameters, bool SoftFloatABI,
+                          bool MMXEnabled = false)
       : TargetCodeGenInfo(new X86_32ABIInfo(
             CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
-            NumRegisterParameters, SoftFloatABI)) {}
+            NumRegisterParameters, SoftFloatABI, MMXEnabled)) {}
 
   static bool isStructReturnInRegABI(
       const llvm::Triple &Triple, const CodeGenOptions &Opts);
@@ -1386,10 +1395,9 @@
   }
 
   if (const VectorType *VT = RetTy->getAs<VectorType>()) {
+    uint64_t Size = getContext().getTypeSize(RetTy);
     // On Darwin, some vectors are returned in registers.
     if (IsDarwinVectorABI) {
-      uint64_t Size = getContext().getTypeSize(RetTy);
-
       // 128-bit vectors are a special case; they are returned in
       // registers and we need to make sure to pick a type the LLVM
       // backend will like.
@@ -1407,6 +1415,10 @@
       return getIndirectReturnResult(RetTy, State);
     }
 
+    if (VT->getElementType()->isIntegerType() && Size == 64 &&
+        isPassInMMXRegABI())
+      return ABIArgInfo::getDirect(llvm::Type::getX86_MMXTy(getVMContext()));
+
     return ABIArgInfo::getDirect();
   }
 
@@ -1701,23 +1713,26 @@
   }
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    uint64_t Size = getContext().getTypeSize(Ty);
     // On Darwin, some vectors are passed in memory, we handle this by passing
     // it as an i8/i16/i32/i64.
     if (IsDarwinVectorABI) {
-      uint64_t Size = getContext().getTypeSize(Ty);
       if ((Size == 8 || Size == 16 || Size == 32) ||
           (Size == 64 && VT->getNumElements() == 1))
         return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                             Size));
     }
 
-    if (IsX86_MMXType(CGT.ConvertType(Ty)))
-      return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));
-
+    if (VT->getElementType()->isIntegerType() && Size == 64) {
+      if (isPassInMMXRegABI())
+        return ABIArgInfo::getDirect(llvm::Type::getX86_MMXTy(getVMContext()));
+      else
+        return ABIArgInfo::getDirect(
+          llvm::IntegerType::get(getVMContext(), 64));
+    }
     return ABIArgInfo::getDirect();
   }
 
-
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
@@ -9460,10 +9475,11 @@
           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
           IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
     } else {
+      bool EnableMMX = getContext().getTargetInfo().getABI() != "no-mmx";
       return SetCGInfo(new X86_32TargetCodeGenInfo(
           Types, IsDarwinVectorABI, RetSmallStructInRegABI,
           IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
-          CodeGenOpts.FloatABI == "soft"));
+          CodeGenOpts.FloatABI == "soft", EnableMMX));
     }
   }
 
Index: cfe/trunk/test/CodeGen/x86_32-arguments-linux.c
===================================================================
--- cfe/trunk/test/CodeGen/x86_32-arguments-linux.c
+++ cfe/trunk/test/CodeGen/x86_32-arguments-linux.c
@@ -3,7 +3,7 @@
 
 // CHECK-LABEL: define void @f56(
 // CHECK: i8 signext %a0, %struct.s56_0* byval(%struct.s56_0) align 4 %a1,
-// CHECK: i64 %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4,
+// CHECK: x86_mmx %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4,
 // CHECK: <1 x double> %a4, %struct.s56_2* byval(%struct.s56_2) align 4,
 // CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 4,
 // CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 4,
@@ -12,7 +12,7 @@
 
 // CHECK: call void (i32, ...) @f56_0(i32 1,
 // CHECK: i32 %{{.*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}},
-// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
+// CHECK: x86_mmx %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}},
 // CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}},
 // CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}},
 // CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}},
Index: cfe/trunk/test/CodeGen/x86_32-m64.c
===================================================================
--- cfe/trunk/test/CodeGen/x86_32-m64.c
+++ cfe/trunk/test/CodeGen/x86_32-m64.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-linux-gnu -target-cpu pentium4 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LINUX
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-netbsd -target-cpu pentium4 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,NETBSD
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-apple-darwin9 -target-cpu yonah -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,DARWIN
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-elfiamcu -mfloat-abi soft -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,IAMCU
+// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN32
+
+#include <mmintrin.h>
+__m64 m64;
+void callee(__m64 __m1, __m64 __m2);
+__m64 caller(__m64 __m1, __m64 __m2)
+{
+// LINUX-LABEL: define x86_mmx @caller(x86_mmx %__m1.coerce, x86_mmx %__m2.coerce)
+// LINUX: tail call void @callee(x86_mmx %__m2.coerce, x86_mmx %__m1.coerce)
+// LINUX: ret x86_mmx
+// NETBSD-LABEL: define x86_mmx @caller(x86_mmx %__m1.coerce, x86_mmx %__m2.coerce)
+// NETBSD: tail call void @callee(x86_mmx %__m2.coerce, x86_mmx %__m1.coerce)
+// NETBSD: ret x86_mmx
+// DARWIN-LABEL: define i64 @caller(i64 %__m1.coerce, i64 %__m2.coerce)
+// DARWIN: tail call void @callee(i64 %__m2.coerce, i64 %__m1.coerce)
+// DARWIN: ret i64
+// IAMCU-LABEL: define <1 x i64> @caller(i64 %__m1.coerce, i64 %__m2.coerce)
+// IAMCU: tail call void @callee(i64 %__m2.coerce, i64 %__m1.coerce)
+// IAMCU: ret <1 x i64>
+// WIN32-LABEL: define dso_local <1 x i64> @caller(i64 %__m1.coerce, i64 %__m2.coerce)
+// WIN32: call void @callee(i64 %__m2.coerce, i64 %__m1.coerce)
+// WIN32: ret <1 x i64>
+  callee(__m2, __m1);
+  return m64;
+}