diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1898,8 +1898,14 @@ return Size <= 64; case 'z': case '0': - // XMM0 - if (FeatureMap.lookup("sse")) + // XMM0/YMM/ZMM0 + if (FeatureMap.lookup("avx512f")) + // ZMM0 can be used if target supports AVX512F. + return Size <= 512U; + else if (FeatureMap.lookup("avx")) + // YMM0 can be used if target supports AVX. + return Size <= 256U; + else if (FeatureMap.lookup("sse")) return Size <= 128U; return false; case 'i': diff --git a/clang/test/CodeGen/x86-inline-asm-v-constraint.c b/clang/test/CodeGen/x86-inline-asm-v-constraint.c --- a/clang/test/CodeGen/x86-inline-asm-v-constraint.c +++ b/clang/test/CodeGen/x86-inline-asm-v-constraint.c @@ -28,3 +28,28 @@ #endif return _zmm0; } + +// SSE: call <4 x float> asm "pcmpeqd $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"() +__m128 testXMM0(void) { + __m128 xmm0; + __asm__("pcmpeqd %0, %0" :"=Yz"(xmm0)); + return xmm0; +} + +// AVX: call <8 x float> asm "vpcmpeqd $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"() +__m256 testYMM0(void) { + __m256 ymm0; +#ifdef AVX + __asm__("vpcmpeqd %0, %0, %0" :"=Yz"(ymm0)); +#endif + return ymm0; +} + +// AVX512: call <16 x float> asm "vpternlogd $$255, $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"() +__m512 testZMM0(void) { + __m512 zmm0; +#ifdef AVX512 + __asm__("vpternlogd $255, %0, %0, %0" :"=Yz"(zmm0)); +#endif + return zmm0; +} diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48046,7 +48046,9 @@ // XMM0 case 'z': case '0': - if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) + if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || + ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) || + ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())) return CW_SpecificReg; return CW_Invalid; // Conditional OpMask regs (AVX512) @@ -48496,6 +48498,8 @@ if (Subtarget.hasAVX()) return std::make_pair(0U, &X86::VR256RegClass); break; + case MVT::v64i8: + case MVT::v32i16: case MVT::v8f64: case MVT::v16f32: case MVT::v16i32: @@ -48521,7 +48525,42 @@ case 'z': case '0': if (!Subtarget.hasSSE1()) break; - return std::make_pair(X86::XMM0, &X86::VR128RegClass); + switch (VT.SimpleTy) { + default: break; + // Scalar SSE types. + case MVT::f32: + case MVT::i32: + return std::make_pair(X86::XMM0, &X86::FR32RegClass); + case MVT::f64: + case MVT::i64: + return std::make_pair(X86::XMM0, &X86::FR64RegClass); + case MVT::f128: + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v2i64: + case MVT::v4f32: + case MVT::v2f64: + return std::make_pair(X86::XMM0, &X86::VR128RegClass); + // AVX types. + case MVT::v32i8: + case MVT::v16i16: + case MVT::v8i32: + case MVT::v4i64: + case MVT::v8f32: + case MVT::v4f64: + if (Subtarget.hasAVX()) + return std::make_pair(X86::YMM0, &X86::VR256RegClass); + break; + case MVT::v8f64: + case MVT::v16f32: + case MVT::v16i32: + case MVT::v8i64: + if (Subtarget.hasAVX512()) + return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); + break; + } + break; case 'k': // This register class doesn't allocate k0 for masked vector operation. if (Subtarget.hasAVX512()) { diff --git a/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll --- a/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-avx-v-constraint.ll @@ -134,3 +134,13 @@ ret <8 x float> %0 } +define <8 x float> @testYMM0() { +; CHECK: vpcmpeqd %ymm0, %ymm0, %ymm0 +entry: + %ymm0 = alloca <8 x float>, align 32 + %0 = call <8 x float> asm "vpcmpeqd $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"() + store <8 x float> %0, <8 x float>* %ymm0, align 32 + %1 = load <8 x float>, <8 x float>* %ymm0, align 32 + ret <8 x float> %1 +} + diff --git a/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll --- a/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll @@ -70,3 +70,12 @@ ret <16 x float> %0 } +define <16 x float> @testZMM0() { +entry: +; CHECK: vpternlogd $255, %zmm0, %zmm0, %zmm0 + %zmm0 = alloca <16 x float>, align 64 + %0 = call <16 x float> asm "vpternlogd $$255, $0, $0, $0", "=^Yz,~{dirflag},~{fpsr},~{flags}"() + store <16 x float> %0, <16 x float>* %zmm0, align 64 + %1 = load <16 x float>, <16 x float>* %zmm0, align 64 + ret <16 x float> %1 +}