Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -35623,8 +35623,14 @@ switch (Constraint[1]) { default: break; - case 'k': + case 'z': + case '0': return C_Register; + case 'k': + case 'm': + case '2': + case 'i': + return C_RegisterClass; } } } @@ -35673,10 +35679,29 @@ weight = CW_SpecificReg; break; case 'Y': - // Other "Y" (e.g. "Yk") constraints should be implemented below. - if (constraint[1] == 'k') { - // Support for 'Yk' (similarly to the 'k' variant below). - weight = CW_SpecificReg; + // "Y" (e.g. "Yk") constraints should be implemented below. + switch(constraint[1]){ + default: + break; + case 'z': // First SSE register xmm0 + case '0': + if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) + weight = CW_SpecificReg; + break; + case '2': // Any SSE register when SSE2 is enabled + case 'i': + if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE2()) + weight = CW_SpecificReg; + break; + case 'm': // Any MMX when "inter-unit moves" are enabled, a reference to + // gcc optimization directive for older AMD cpus. + if (type->isX86_MMXTy() && Subtarget.hasMMX()) + weight = CW_SpecificReg; + break; + // Support for 'Yk' constraint, similarly to the 'k' variant below. + // Denotes specific use of %k1-k7. + case 'k': + weight = CW_SpecificReg; break; } // Else fall through (handle "Y" constraint). @@ -36091,9 +36116,22 @@ break; } } else if (Constraint.size() == 2 && Constraint[0] == 'Y') { + char EqualSingelCharConstraint; switch (Constraint[1]) { default: break; + case 'i': // Any SSE register when SSE2 is enabled (GCC). + case '2': // Any SSE register when SSE2 is enabled. + case 'm': // Any mmx register when inter-unit moves are enabled. + // These are practically identical to thier single letter counterparts. + // Map the double latter constraints to : 'Yi','Y2' -> 'Y' | 'Ym' -> 'y' + EqualSingelCharConstraint = (Constraint[1] == 'm' ? 'y': 'Y'); + return getRegForInlineAsmConstraint( + TRI, StringRef(&EqualSingelCharConstraint, 1), VT); + case 'z': // %xmm0 + case '0': + if (!Subtarget.hasSSE1()) break; + return std::make_pair(X86::XMM0, &X86::VR128RegClass); case 'k': // This register class doesn't allocate k0 for masked vector operation. if (Subtarget.hasAVX512()) { // Only supported in AVX512. Index: test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll =================================================================== --- test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll +++ test/MC/X86/x86-GCC-inline-asm-Y-constraints.ll @@ -0,0 +1,69 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu skx < %s | FileCheck %s +; This test compliments the .c test under clang/test/CodeGen/. We check +; if the inline asm constraints are respected in the generated code. + +; Function Attrs: nounwind +define void @f_Ym(i64 %m.coerce) { +; Any mmx regiter constraint +; CHECK-LABEL: f_Ym: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: movq %mm{{[0-9]+}}, %mm1 +; CHECK: ## InlineAsm End + +entry: + %0 = tail call x86_mmx asm sideeffect "movq $0, %mm1\0A\09", "=^Ym,~{dirflag},~{fpsr},~{flags}"() + ret void +} + +; Function Attrs: nounwind +define void @f_Yi(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; Any SSE register when SSE2 is enabled (GCC when inter-unit moves enabled) +; CHECK-LABEL: f_Yi: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End + +entry: + %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Yi,^Yi,^Yi,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} + +; Function Attrs: nounwind +define void @f_Y2(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; Any SSE register when SSE2 is enabled +; CHECK-LABEL: f_Y2: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End + +entry: + %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Y2,^Y2,^Y2,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} + +; Function Attrs: nounwind +define void @f_Yz(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; xmm0 SSE register(GCC) +; CHECK-LABEL: f_Yz: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End +entry: + %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Yz,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} + +; Function Attrs: nounwind +define void @f_Y0(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; xmm0 SSE register +; CHECK-LABEL: f_Y0: +; CHECK: ## InlineAsm Start +; CHECK-NEXT: vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; CHECK: ## InlineAsm End + +entry: + %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Y0,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) + ret void +} \ No newline at end of file