Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -36030,8 +36030,8 @@ case 'v': case 'Y': case 'l': - return C_RegisterClass; case 'k': // AVX512 masking registers. + return C_RegisterClass; case 'a': case 'b': case 'c': @@ -36063,8 +36063,15 @@ switch (Constraint[1]) { default: break; - case 'k': + case 'z': + case '0': return C_Register; + case 'i': + case 'm': + case 'k': + case 't': + case '2': + return C_RegisterClass; } } } @@ -36112,15 +36119,42 @@ if (type->isX86_MMXTy() && Subtarget.hasMMX()) weight = CW_SpecificReg; break; - case 'Y': - // Other "Y" (e.g. "Yk") constraints should be implemented below. - if (constraint[1] == 'k') { - // Support for 'Yk' (similarly to the 'k' variant below). - weight = CW_SpecificReg; + case 'Y': { + unsigned Size = StringRef(constraint).size(); + // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching 'Y' + char NextChar = Size == 2 ? constraint[1] : 'i'; + if (Size > 2) break; + switch (NextChar) { + default: + return CW_Invalid; + // XMM0 + case 'z': + case '0': + if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) + return CW_SpecificReg; + return CW_Invalid; + // Conditional OpMask regs (AVX512) + case 'k': + if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) + return CW_Register; + return CW_Invalid; + // Any MMX reg + case 'm': + if (type->isX86_MMXTy() && Subtarget.hasMMX()) + return weight; + return CW_Invalid; + // Any SSE reg when ISA >= SSE2, same as 'Y' + case 'i': + case 't': + case '2': + if (!Subtarget.hasSSE2()) + return CW_Invalid; + break; } - // Else fall through (handle "Y" constraint). + // Fall through (handle "Y" constraint). LLVM_FALLTHROUGH; + } case 'v': if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) weight = CW_Register; @@ -36132,7 +36166,8 @@ break; case 'k': // Enable conditional vector operations using %k<#> registers. - weight = CW_SpecificReg; + if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) + weight = CW_Register; break; case 'I': if (ConstantInt *C = dyn_cast(info.CallOperandVal)) { @@ -36534,6 +36569,17 @@ switch (Constraint[1]) { default: break; + case 'i': + case 't': + case '2': + return getRegForInlineAsmConstraint(TRI, "Y", VT); + case 'm': + if (!Subtarget.hasMMX()) break; + return std::make_pair(0U, &X86::VR64RegClass); + case 'z': + case '0': + if (!Subtarget.hasSSE1()) break; + return std::make_pair(X86::XMM0, &X86::VR128RegClass); case 'k': // This register class doesn't allocate k0 for masked vector operation. if (Subtarget.hasAVX512()) { // Only supported in AVX512. Index: test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll =================================================================== --- test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll +++ test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll @@ -0,0 +1,83 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu skx < %s | FileCheck %s +; This test compliments the .c test under clang/test/CodeGen/. We check +; if the inline asm constraints are respected in the generated code. + +; Function Attrs: nounwind +define void @f_Ym(i64 %m.coerce) { +; Any mmx regiter constraint +; CHECK-LABEL: f_Ym: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: movq %mm{{[0-9]+}}, %mm1 +; CHECK: ## InlineAsm End + +entry: + %0 = tail call x86_mmx asm sideeffect "movq $0, %mm1\0A\09", "=^Ym,~{dirflag},~{fpsr},~{flags}"() + ret void +} + +; Function Attrs: nounwind +define void @f_Yi(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; Any SSE register when SSE2 is enabled (GCC when inter-unit moves enabled) +; CHECK-LABEL: f_Yi: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End + +entry: + %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Yi,^Yi,^Yi,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} + +; Function Attrs: nounwind +define void @f_Yt(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; Any SSE register when SSE2 is enabled +; CHECK-LABEL: f_Yt: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End + +entry: + %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Yt,^Yt,^Yt,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} + +; Function Attrs: nounwind +define void @f_Y2(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; Any SSE register when SSE2 is enabled +; CHECK-LABEL: f_Y2: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End + +entry: + %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Y2,^Y2,^Y2,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} + +; Function Attrs: nounwind +define void @f_Yz(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; xmm0 SSE register(GCC) +; CHECK-LABEL: f_Yz: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End +entry: + %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Yz,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} + +; Function Attrs: nounwind +define void @f_Y0(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; xmm0 SSE register +; CHECK-LABEL: f_Y0: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End + +entry: + %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Y0,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} +