Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -2136,6 +2136,60 @@ // FIXME: Handle more intrinsics. switch (II->getIntrinsicID()) { default: return false; + case Intrinsic::convert_from_fp16: + case Intrinsic::convert_to_fp16: { + if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) + return false; + + const Value *Op = II->getArgOperand(0); + bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16; + // F16C allows converting from float to half and from half to float. + // In the case of float-to-half conversion, the type must be a float. + if (IsFloatToHalf && !Op->getType()->isFloatTy()) + return false; + + unsigned InputReg = getRegForValue(Op); + if (!IsFloatToHalf) { + assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!"); + // Explicitly sign-extend the input value to 32-bit. + InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, + InputReg, /*Kill=*/false); + } + + // Copy to a vector (VR128) register class. + const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(InputReg); + InputReg = ResultReg; + + // Now generate a VCVTPS2PHrr/VCVTPH2PSrr. + ResultReg = createResultReg(RC); + unsigned Opc = IsFloatToHalf ? X86::VCVTPS2PHrr : X86::VCVTPH2PSrr; + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg).addReg(InputReg, RegState::Kill); + if (IsFloatToHalf) + // Instruction VCVTPS2PHrr requires an extra immediate operand that + // provides rounding control. + MIB.addImm(0); + InputReg = ResultReg; + + // Emit another copy to register class. + RC = IsFloatToHalf ? &X86::GR32RegClass : &X86::FR32RegClass; + ResultReg = createResultReg(RC); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg); + MIB.addReg(InputReg, RegState::Kill); + + if (IsFloatToHalf) + // In the case of float-to-half conversions, the half float is in + // the lower 16-bits of ResultReg. + ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, /*Kill=*/true, + X86::sub_16bit); + updateValueMap(II, ResultReg); + return true; + } case Intrinsic::frameaddress: { MachineFunction *MF = FuncInfo.MF; if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI()) Index: test/CodeGen/X86/fast-isel-float-half-convertion.ll =================================================================== --- test/CodeGen/X86/fast-isel-float-half-convertion.ll +++ test/CodeGen/X86/fast-isel-float-half-convertion.ll @@ -0,0 +1,21 @@ +; RUN: llc -O0 -fast-isel-abort -mtriple=x86_64-unknown-unknown -mattr=+f16c < %s | FileCheck %s + +define i16 @test_fp32_to_fp16(float %a) { +; CHECK-LABEL: test_fp32_to_fp16: +; CHECK: vcvtps2ph +entry: + %0 = call i16 @llvm.convert.to.fp16.f32(float %a) + ret i16 %0 +} + +define float @test_fp16_to_fp32(i16 signext %a) { +; CHECK-LABEL: test_fp16_to_fp32: +; CHECK: vcvtph2ps +entry: + %0 = call float @llvm.convert.from.fp16.f32(i16 %a) + ret float %0 +} + + +declare i16 @llvm.convert.to.fp16.f32(float) +declare float @llvm.convert.from.fp16.f32(i16)