Index: lib/Basic/Targets/X86.h =================================================================== --- lib/Basic/Targets/X86.h +++ lib/Basic/Targets/X86.h @@ -437,9 +437,12 @@ // In case the constraint is 'r' we need to return Expression case 'r': return Expression; + // Double letters Y constraints + case 'Y': + if ((++I != E) && ((*I == '0') || (*I == 'z'))) + return "xmm0"; default: - // Default value if there is no constraint for the register - return ""; + break; } return ""; } Index: lib/Basic/Targets/X86.cpp =================================================================== --- lib/Basic/Targets/X86.cpp +++ lib/Basic/Targets/X86.cpp @@ -1386,7 +1386,9 @@ switch (*Name) { default: return false; + case 'z': case '0': // First SSE register. + case '2': case 't': // Any SSE register, when SSE2 is enabled. case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled. case 'm': // Any MMX register, when inter-unit moves enabled. @@ -1455,33 +1457,39 @@ case 't': case 'u': return Size <= 128; - case 'v': - case 'x': - if (SSELevel >= AVX512F) - // 512-bit zmm registers can be used if target supports AVX512F. - return Size <= 512U; - else if (SSELevel >= AVX) - // 256-bit ymm registers can be used if target supports AVX. - return Size <= 256U; - return Size <= 128U; case 'Y': // 'Y' is the first character for several 2-character constraints. switch (Constraint[1]) { default: - break; + return false; case 'm': // 'Ym' is synonymous with 'y'. case 'k': return Size <= 64; + case 'z': + case '0': + // XMM0 + if (SSELevel >= SSE1) + return Size <= 128U; + return false; case 'i': case 't': - // 'Yi' and 'Yt' are synonymous with 'x' when SSE2 is enabled. - if (SSELevel >= AVX512F) - return Size <= 512U; - else if (SSELevel >= AVX) - return Size <= 256U; - return SSELevel >= SSE2 && Size <= 128U; + case '2': + // 'Yi','Yt','Y2' are synonymous with 'x' when SSE2 is enabled. + if (SSELevel < SSE2) + return false; + break; } + case 'v': + case 'x': + if (SSELevel >= AVX512F) + // 512-bit zmm registers can be used if target supports AVX512F. + return Size <= 512U; + else if (SSELevel >= AVX) + // 256-bit ymm registers can be used if target supports AVX. + return Size <= 256U; + return Size <= 128U; + } return true; @@ -1515,6 +1523,12 @@ // the return string. break; case 'k': + case 'm': + case 'i': + case 't': + case 'z': + case '0': + case '2': // "^" hints llvm that this is a 2 letter constraint. // "Constraint++" is used to promote the string iterator // to the next constraint. Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -22,6 +22,7 @@ #include "clang/CodeGen/SwiftCallingConv.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" @@ -870,7 +871,10 @@ static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF, StringRef Constraint, llvm::Type* Ty) { - if ((Constraint == "y" || Constraint == "&y") && Ty->isVectorTy()) { + bool IsMMXCons = llvm::StringSwitch(Constraint) + .Cases("y", "&y", "^Ym", true) + .Default(false); + if (IsMMXCons && Ty->isVectorTy()) { if (cast(Ty)->getBitWidth() != 64) { // Invalid MMX constraint return nullptr; Index: test/CodeGen/x86-GCC-inline-asm-Y-constraints.c =================================================================== --- test/CodeGen/x86-GCC-inline-asm-Y-constraints.c +++ test/CodeGen/x86-GCC-inline-asm-Y-constraints.c @@ -0,0 +1,68 @@ +// RUN: %clang_cc1 -ffreestanding -triple=x86_64-apple-darwin -target-cpu skx %s -emit-llvm -o - | FileCheck %s +#include +// This test is complimented by the .ll test under llvm/test/MC/X86/. +// At this level we can only check if the constarints are passed correctly +// from inline asm to llvm IR. + +// CHECK-LABEL: @f_Ym +void f_Ym(__m64 m) + { + // CHECK: movq $0, %mm1 + // CHECK-SAME: "=^Ym,~{dirflag},~{fpsr},~{flags}" + __asm__ volatile ("movq %0, %%mm1\n\t" + :"=Ym" (m)); +} + +// CHECK-LABEL: f_Yi +void f_Yi(__m128 x, __m128 y, __m128 z) + { + // CHECK: vpaddq + // CHECK-SAME: "=^Yi,^Yi,^Yi,~{dirflag},~{fpsr},~{flags}" + __asm__ volatile ("vpaddq %0, %1, %2\n\t" + :"=Yi" (x) + :"Yi" (y),"Yi"(z)); +} + +// CHECK-LABEL: f_Yt +void f_Yt(__m128 x, __m128 y, __m128 z) + { + // CHECK: vpaddq + // CHECK-SAME: "=^Yt,^Yt,^Yt,~{dirflag},~{fpsr},~{flags}" + __asm__ volatile ("vpaddq %0, %1, %2\n\t" + :"=Yt" (x) + :"Yt" (y),"Yt"(z)); +} + +// CHECK-LABEL: f_Y2 +void f_Y2(__m128 x, __m128 y, __m128 z) + { + // CHECK: vpaddq + // CHECK-SAME: "=^Y2,^Y2,^Y2,~{dirflag},~{fpsr},~{flags}" + __asm__ volatile ("vpaddq %0, %1, %2\n\t" + :"=Y2" (x) + :"Y2" (y),"Y2"(z)); +} + +// CHECK-LABEL: f_Yz +void f_Yz(__m128 x, __m128 y, __m128 z) + { + // CHECK: vpaddq + // CHECK-SAME: vpaddq + // CHECK-SAME: "=^Yi,=^Yz,^Yi,0,~{dirflag},~{fpsr},~{flags}" + __asm__ volatile ("vpaddq %0,%2,%1\n\t" + "vpaddq %1,%0,%2\n\t" + :"+Yi"(z),"=Yz" (x) + :"Yi" (y) ); +} + +// CHECK-LABEL: f_Y0 +void f_Y0(__m128 x, __m128 y, __m128 z) + { + // CHECK: vpaddq + // CHECK-SAME: "=^Yi,=^Y0,^Yi,0,~{dirflag},~{fpsr},~{flags}" + __asm__ volatile ("vpaddq %0,%2,%1\n\t" + "vpaddq %1,%0,%2\n\t" + :"+Yi"(z),"=Y0" (x) + :"Yi" (y) ); +} +