Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -675,19 +675,26 @@ // Between half, single and double-precision. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", - [ /* intentionally left blank, see rule below */ ]>, + [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; def : FullFP16Pat<(f32 (fpextend HPR:$Sm)), (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>; +def : FP16Pat<(f16_to_fp GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", - []>, + [/* Intentionally left blank, see patterns below */]>, Requires<[HasFP16]>, Sched<[WriteFPCVT]>; +def : FullFP16Pat<(f16 (fpround SPR:$Sm)), + (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), SPR)>; +def : FP16Pat<(fp_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; + def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]>, @@ -703,7 +710,8 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm", - []>, Requires<[HasFPARMv8, HasDPVFP]>, + [/* Intentionally left blank, see patterns below */]>, + Requires<[HasFPARMv8, HasDPVFP]>, Sched<[WriteFPCVT]> { // Instruction operands. bits<5> Sm; @@ -713,10 +721,16 @@ let Inst{5} = Sm{0}; } +def : FullFP16Pat<(f64 (fpextend HPR:$Sm)), + (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, DPR))>; +def : FP16Pat<(f64 (f16_to_fp GPR:$a)), + (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; + def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm", - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [/* Intentionally left blank, see patterns below */]>, + Requires<[HasFPARMv8, HasDPVFP]> { // Instruction operands. bits<5> Sd; bits<5> Dm; @@ -728,6 +742,11 @@ let Inst{22} = Sd{0}; } +def : FullFP16Pat<(f16 (fpround DPR:$Dm)), + (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>; +def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), + (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>; + def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs DPR:$Dd), (ins SPR:$Sm), NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm", @@ -755,18 +774,6 @@ let Inst{5} = Dm{4}; } -def : FP16Pat<(fp_to_f16 SPR:$a), - (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; - -def : FP16Pat<(fp_to_f16 (f64 DPR:$a)), - (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>; - -def : FP16Pat<(f16_to_fp GPR:$a), - (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; - -def : FP16Pat<(f64 (f16_to_fp GPR:$a)), - (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; - multiclass vcvt_inst rm, SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { Index: test/CodeGen/ARM/fp16-instructions.ll =================================================================== --- test/CodeGen/ARM/fp16-instructions.ll +++ test/CodeGen/ARM/fp16-instructions.ll @@ -212,6 +212,44 @@ ; CHECK-HARDFP-FULLFP16-NEXT: vcvt.f16.s32 s0, s0 } +define i32 @f2h(float %f) { +entry: + %conv = fptrunc float %f to half + %0 = bitcast half %conv to i16 + %tmp.0.insert.ext = zext i16 %0 to i32 + ret i32 %tmp.0.insert.ext + +; CHECK-LABEL: f2h: +; CHECK-HARDFP-FULLFP16: vcvtb.f16.f32 s0, s0 +} + +define float @h2f(i32 %h.coerce) { +entry: + %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 + %0 = bitcast i16 %tmp.0.extract.trunc to half + %conv = fpext half %0 to float + ret float %conv + +; CHECK-LABEL: h2f: +; CHECK-HARDFP-FULLFP16: vcvtb.f32.f16 s0, s0 +} + +define i32 @h2d_d2h(i32 %h.coerce, double %d) { +entry: + %tmp.0.extract.trunc = trunc i32 %h.coerce to i16 + %0 = bitcast i16 %tmp.0.extract.trunc to half + %conv = fpext half %0 to double + %add = fadd double %conv, %d + %conv2 = fptrunc double %add to half + %1 = bitcast half %conv2 to i16 + %tmp3.0.insert.ext = zext i16 %1 to i32 + ret i32 %tmp3.0.insert.ext + +; CHECK-LABEL: h2d_d2h: +; CHECK-HARDFP-FULLFP16: vcvtb.f64.f16 d{{.*}}, s{{.}} +; CHECK-HARDFP-FULLFP16: vcvtb.f16.f64 s0, d{{.*}} +} + ; TODO: ; 7. VCVTA ; 8. VCVTM