Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -917,10 +917,10 @@ /// IsX86_MMXType - Return true if this is an MMX type. bool IsX86_MMXType(llvm::Type *IRType) { - // Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>. + // Return true if the type is an MMX type <1 x i64>, <2 x i32>, <4 x i16>, + // or <8 x i8>. return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 && - cast(IRType)->getElementType()->isIntegerTy() && - IRType->getScalarSizeInBits() != 64; + cast(IRType)->getElementType()->isIntegerTy(); } static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, @@ -1011,6 +1011,7 @@ bool IsSoftFloatABI; bool IsMCUABI; unsigned DefaultNumRegisterParameters; + bool IsMMXEnabled; static bool isRegisterSize(unsigned Size) { return (Size == 8 || Size == 16 || Size == 32 || Size == 64); @@ -1070,13 +1071,15 @@ X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters, bool SoftFloatABI) + unsigned NumRegisterParameters, bool SoftFloatABI, + bool MMXEnabled) : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), - DefaultNumRegisterParameters(NumRegisterParameters) {} + DefaultNumRegisterParameters(NumRegisterParameters), + IsMMXEnabled(MMXEnabled) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { @@ -1097,10 +1100,11 @@ public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters, bool SoftFloatABI) + unsigned NumRegisterParameters, bool SoftFloatABI, + bool MMXEnabled = false) : TargetCodeGenInfo(new X86_32ABIInfo( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, - NumRegisterParameters, SoftFloatABI)) {} + NumRegisterParameters, SoftFloatABI, MMXEnabled)) {} static bool isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts); @@ -1407,6 +1411,10 @@ return getIndirectReturnResult(RetTy, State); } + if (IsMMXEnabled && IsX86_MMXType(CGT.ConvertType(RetTy))) { + return ABIArgInfo::getDirect(llvm::Type::getX86_MMXTy(getVMContext())); + } + return ABIArgInfo::getDirect(); } @@ -1711,8 +1719,11 @@ Size)); } - if (IsX86_MMXType(CGT.ConvertType(Ty))) - return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); + if (IsX86_MMXType(CGT.ConvertType(Ty))) { + if (IsMMXEnabled) + return ABIArgInfo::getDirect(llvm::Type::getX86_MMXTy(getVMContext())); + return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),64)); + } return ABIArgInfo::getDirect(); } @@ -9475,10 +9486,12 @@ Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters)); } else { + // System V i386 ABI requires __m64 value passing by MMX registers. + bool EnableMMX = getContext().getTargetInfo().getABI() != "no-mmx"; return SetCGInfo(new X86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, - CodeGenOpts.FloatABI == "soft")); + CodeGenOpts.FloatABI == "soft", EnableMMX)); } } Index: test/CodeGen/vector.c =================================================================== --- test/CodeGen/vector.c +++ test/CodeGen/vector.c @@ -78,5 +78,5 @@ return y; } -// CHECK: define void @lax_vector_compare2(<2 x i32>* {{.*sret.*}}, i64 {{.*}}, i64 {{.*}}) +// CHECK: define void @lax_vector_compare2(<2 x i32>* {{.*sret.*}}, i64 {{.*}}, x86_mmx {{.*}}) // CHECK: icmp eq <2 x i32> Index: test/CodeGen/x86_32-arguments-darwin.c =================================================================== --- test/CodeGen/x86_32-arguments-darwin.c +++ test/CodeGen/x86_32-arguments-darwin.c @@ -229,7 +229,7 @@ // CHECK-LABEL: define void @f56( // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1, -// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4, +// CHECK: x86_mmx %a2.coerce, %struct.s56_1* byval align 4, // CHECK: i64 %a4.coerce, %struct.s56_2* byval align 4, // CHECK: <4 x i32> %a6, %struct.s56_3* byval align 16 %a7, // CHECK: <2 x double> %a8, %struct.s56_4* byval align 16 %a9, @@ -238,7 +238,7 @@ // CHECK: call void (i32, ...) @f56_0(i32 1, // CHECK: i32 %{{[^ ]*}}, %struct.s56_0* byval align 4 %{{[^ ]*}}, -// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}}, +// CHECK: x86_mmx %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}}, // CHECK: i64 %{{[^ ]*}}, %struct.s56_2* byval align 4 %{{[^ ]*}}, // CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 16 %{{[^ ]*}}, // CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 16 %{{[^ ]*}}, Index: test/CodeGen/x86_32-arguments-linux.c =================================================================== --- test/CodeGen/x86_32-arguments-linux.c +++ test/CodeGen/x86_32-arguments-linux.c @@ -3,7 +3,7 @@ // CHECK-LABEL: define void @f56( // CHECK: i8 signext %a0, %struct.s56_0* byval align 4 %a1, -// CHECK: i64 %a2.coerce, %struct.s56_1* byval align 4, +// CHECK: x86_mmx %a2.coerce, %struct.s56_1* byval align 4, // CHECK: <1 x double> %a4, %struct.s56_2* byval align 4, // CHECK: <4 x i32> %a6, %struct.s56_3* byval align 4, // CHECK: <2 x double> %a8, %struct.s56_4* byval align 4, @@ -12,7 +12,7 @@ // CHECK: call void (i32, ...) @f56_0(i32 1, // CHECK: i32 %{{.*}}, %struct.s56_0* byval align 4 %{{[^ ]*}}, -// CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}}, +// CHECK: x86_mmx %{{[^ ]*}}, %struct.s56_1* byval align 4 %{{[^ ]*}}, // CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval align 4 %{{[^ ]*}}, // CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval align 4 %{{[^ ]*}}, // CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval align 4 %{{[^ ]*}}, Index: test/CodeGen/x86_32-m64-darwin.c =================================================================== --- /dev/null +++ test/CodeGen/x86_32-m64-darwin.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -w -fblocks -triple i386-apple-darwin9 -target-cpu yonah -target-feature +mmx -emit-llvm -O2 -o - %s | FileCheck %s + +// CHECK-LABEL: define i64 @caller(i64 %__m1.coerce, i64 %__m2.coerce) +// CHECK: tail call void @callee(i64 %__m2.coerce, i64 %__m1.coerce) +// CHECK: ret i64 + +#include +__m64 m64; +void callee(__m64 __m1, __m64 __m2); +__m64 caller(__m64 __m1, __m64 __m2) +{ + callee(__m2, __m1); + return m64; +} Index: test/CodeGen/x86_32-m64-iamcu.c =================================================================== --- /dev/null +++ test/CodeGen/x86_32-m64-iamcu.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -w -triple i386-pc-elfiamcu -mfloat-abi soft -emit-llvm -O2 -o - %s | FileCheck %s + +// CHECK-LABEL: define <1 x i64> @caller(i64 %__m1.coerce, i64 %__m2.coerce) +// CHECK: tail call void @callee(i64 %__m2.coerce, i64 %__m1.coerce) +// CHECK: ret <1 x i64> + +#include +__m64 m64; +void callee(__m64 __m1, __m64 __m2); +__m64 caller(__m64 __m1, __m64 __m2) +{ + callee(__m2, __m1); + return m64; +} Index: test/CodeGen/x86_32-m64-linux.c =================================================================== --- /dev/null +++ test/CodeGen/x86_32-m64-linux.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -w -O2 -fblocks -triple i386-pc-linux-gnu -target-cpu pentium4 -emit-llvm -o %t %s +// RUN: FileCheck < %t %s + +// CHECK-LABEL: define x86_mmx @caller(x86_mmx %__m1.coerce, x86_mmx %__m2.coerce) +// CHECK: tail call void @callee(x86_mmx %__m2.coerce, x86_mmx %__m1.coerce) +// CHECK: ret x86_mmx + +#include +__m64 m64; +void callee(__m64 __m1, __m64 __m2); +__m64 caller(__m64 __m1, __m64 __m2) +{ + callee(__m2, __m1); + return m64; +} Index: test/CodeGen/x86_32-m64-win32.c =================================================================== --- /dev/null +++ test/CodeGen/x86_32-m64-win32.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -w -triple i386-pc-win32 -emit-llvm -O2 -o - %s | FileCheck %s + +// CHECK-LABEL: define dso_local <1 x i64> @caller(i64 %__m1.coerce, i64 %__m2.coerce) +// CHECK: call void @callee(i64 %__m2.coerce, i64 %__m1.coerce) +// CHECK: ret <1 x i64> + +#include +__m64 m64; +void callee(__m64 __m1, __m64 __m2); +__m64 caller(__m64 __m1, __m64 __m2) +{ + callee(__m2, __m1); + return m64; +}