diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53115,6 +53115,10 @@ switch (VT.SimpleTy) { default: break; // Scalar SSE types. + case MVT::f16: + if (VConstraint && Subtarget.hasFP16()) + return std::make_pair(0U, &X86::FR16XRegClass); + break; case MVT::f32: case MVT::i32: if (VConstraint && Subtarget.hasVLX()) @@ -53133,6 +53137,10 @@ } break; // Vector types and fp128. + case MVT::v8f16: + if (!Subtarget.hasFP16()) + break; + LLVM_FALLTHROUGH; case MVT::f128: case MVT::v16i8: case MVT::v8i16: @@ -53144,6 +53152,10 @@ return std::make_pair(0U, &X86::VR128XRegClass); return std::make_pair(0U, &X86::VR128RegClass); // AVX types. + case MVT::v16f16: + if (!Subtarget.hasFP16()) + break; + LLVM_FALLTHROUGH; case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -53155,6 +53167,10 @@ if (Subtarget.hasAVX()) return std::make_pair(0U, &X86::VR256RegClass); break; + case MVT::v32f16: + if (!Subtarget.hasFP16()) + break; + LLVM_FALLTHROUGH; case MVT::v64i8: case MVT::v32i16: case MVT::v8f64: @@ -53184,12 +53200,20 @@ switch (VT.SimpleTy) { default: break; // Scalar SSE types. + case MVT::f16: + if (!Subtarget.hasFP16()) + break; + return std::make_pair(X86::XMM0, &X86::FR16XRegClass); case MVT::f32: case MVT::i32: return std::make_pair(X86::XMM0, &X86::FR32RegClass); case MVT::f64: case MVT::i64: return std::make_pair(X86::XMM0, &X86::FR64RegClass); + case MVT::v8f16: + if (!Subtarget.hasFP16()) + break; + LLVM_FALLTHROUGH; case MVT::f128: case MVT::v16i8: case MVT::v8i16: @@ -53199,6 +53223,10 @@ case MVT::v2f64: return std::make_pair(X86::XMM0, &X86::VR128RegClass); // AVX types. + case MVT::v16f16: + if (!Subtarget.hasFP16()) + break; + LLVM_FALLTHROUGH; case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -53208,6 +53236,10 @@ if (Subtarget.hasAVX()) return std::make_pair(X86::YMM0, &X86::VR256RegClass); break; + case MVT::v32f16: + if (!Subtarget.hasFP16()) + break; + LLVM_FALLTHROUGH; case MVT::v64i8: case MVT::v32i16: case MVT::v8f64: @@ -53365,7 +53397,9 @@ // find, ignoring the required type. // TODO: Handle f128 and i128 in FR128RegClass after it is tested well. - if (VT == MVT::f32 || VT == MVT::i32) + if (VT == MVT::f16) + Res.second = &X86::FR16XRegClass; + else if (VT == MVT::f32 || VT == MVT::i32) Res.second = &X86::FR32XRegClass; else if (VT == MVT::f64 || VT == MVT::i64) Res.second = &X86::FR64XRegClass; diff --git a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll --- a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll +++ b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll @@ -1,5 +1,9 @@ -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -stop-after=finalize-isel | FileCheck %s +; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f -stop-after=finalize-isel > %t 2> %t.err +; RUN: FileCheck < %t %s +; RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16 -stop-after=finalize-isel | FileCheck --check-prefixes=CHECK,FP16 %s +; CHECK-LABEL: name: mask_Yk_i8 ; CHECK: %[[REG1:.*]]:vr512_0_15 = COPY %1 ; CHECK: %[[REG2:.*]]:vr512_0_15 = COPY %2 ; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0 /* attdialect */, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags @@ -9,3 +13,14 @@ %0 = tail call <8 x i64> asm "vpaddq\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <8 x i64> %x, <8 x i64> %y) ret <8 x i64> %0 } + +; FP16-LABEL: name: mask_Yk_f16 +; FP16: %[[REG1:.*]]:vr512_0_15 = COPY %1 +; FP16: %[[REG2:.*]]:vr512_0_15 = COPY %2 +; FP16: INLINEASM &"vaddph\09$3, $2, $0 {$1}", 0 /* attdialect */, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags +; CHECK-STDERR: couldn't allocate output register for constraint 'x' +define <32 x half> @mask_Yk_f16(i8 signext %msk, <32 x half> %x, <32 x half> %y) { +entry: + %0 = tail call <32 x half> asm "vaddph\09$3, $2, $0 {$1}", "=x,^Yk,x,x,~{dirflag},~{fpsr},~{flags}"(i8 %msk, <32 x half> %x, <32 x half> %y) + ret <32 x half> %0 +}