Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -2182,6 +2182,68 @@ // FIXME: Handle more intrinsics. switch (II->getIntrinsicID()) { default: return false; + case Intrinsic::convert_from_fp16: + case Intrinsic::convert_to_fp16: { + if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) + return false; + + const Value *Op = II->getArgOperand(0); + unsigned InputReg = getRegForValue(Op); + if (InputReg == 0) + return false; + + // F16C only allows converting from float to half and from half to float. + bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; + if (IsFloatToHalf) { + if (!Op->getType()->isFloatTy()) + return false; + } else { + if (!II->getType()->isFloatTy()) + return false; + } + + unsigned ResultReg = 0; + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); + if (IsFloatToHalf) { + // 'InputReg' is implicitly promoted from register class FR32 to + // register class VR128 by method 'constrainOperandRegClass' which is + // directly called by 'fastEmitInst_ri'. + // Instruction VCVTPS2PHrr takes an extra immediate operand which is + // used to provide rounding control. + InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0); + + // Move the lower 32-bits of ResultReg to another register of class GR32. + ResultReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(X86::VMOVPDI2DIrr), ResultReg) + .addReg(InputReg, RegState::Kill); + + // The result value is in the lower 16-bits of ResultReg. + unsigned RegIdx = X86::sub_16bit; + ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx); + } else { + assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); + // Explicitly sign-extend the input to 32-bit. + InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg, + /*Kill=*/false); + + // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr. + InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR, + InputReg, /*Kill=*/true); + + InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true); + + // The result value is in the lower 32-bits of ResultReg. + // Emit an explicit copy from register class VR128 to register class FR32. + ResultReg = createResultReg(&X86::FR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(InputReg, RegState::Kill); + } + + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::frameaddress: { MachineFunction *MF = FuncInfo.MF; if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) Index: test/CodeGen/X86/fast-isel-double-half-convertion.ll =================================================================== --- test/CodeGen/X86/fast-isel-double-half-convertion.ll +++ test/CodeGen/X86/fast-isel-double-half-convertion.ll @@ -0,0 +1,23 @@ +; RUN: llc -fast-isel -fast-isel-abort -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s + +; XFAIL: * + +; In the future, we might want to teach fast-isel how to expand a double-to-half +; conversion into a double-to-float conversion immediately followed by a +; float-to-half conversion. For now, fast-isel is expected to fail. + +define double @test_fp16_to_fp64(i32 %a) { +entry: + %0 = trunc i32 %a to i16 + %1 = call double @llvm.convert.from.fp16.f64(i16 %0) + ret float %0 +} + +define i16 @test_fp64_to_fp16(double %a) { +entry: + %0 = call i16 @llvm.convert.to.fp16.f64(double %a) + ret i16 %0 +} + +declare i16 @llvm.convert.to.fp16.f64(double) +declare double @llvm.convert.from.fp16.f64(i16) Index: test/CodeGen/X86/fast-isel-float-half-convertion.ll =================================================================== --- test/CodeGen/X86/fast-isel-float-half-convertion.ll +++ test/CodeGen/X86/fast-isel-float-half-convertion.ll @@ -0,0 +1,28 @@ +; RUN: llc -fast-isel -fast-isel-abort -asm-verbose=false -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s + +; Verify that fast-isel correctly expands float-half conversions. + +define i16 @test_fp32_to_fp16(float %a) { +; CHECK-LABEL: test_fp32_to_fp16: +; CHECK: vcvtps2ph $0, %xmm0, %xmm0 +; CHECK-NEXT: vmovd %xmm0, %eax +; CHECK-NEXT: retq +entry: + %0 = call i16 @llvm.convert.to.fp16.f32(float %a) + ret i16 %0 +} + +define float @test_fp16_to_fp32(i32 %a) { +; CHECK-LABEL: test_fp16_to_fp32: +; CHECK: movswl %di, %eax +; CHECK-NEXT: vmovd %eax, %xmm0 +; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-NEXT: retq +entry: + %0 = trunc i32 %a to i16 + %1 = call float @llvm.convert.from.fp16.f32(i16 %0) + ret float %1 +} + +declare i16 @llvm.convert.to.fp16.f32(float) +declare float @llvm.convert.from.fp16.f32(i16)