diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -6946,6 +6946,9 @@ v4f32, v4i16, int_arm_neon_vcvthf2fp>, Requires<[HasNEON, HasFP16]>; +def : Pat<(v4f16 (fpround (v4f32 QPR:$src))), (VCVTf2h QPR:$src)>; +def : Pat<(v4f32 (fpextend (v4f16 DPR:$src))), (VCVTh2f DPR:$src)>; + // Vector Reverse. // VREV64 : Vector Reverse elements within 64-bit doublewords diff --git a/llvm/test/CodeGen/ARM/fp16-vector-cvt.ll b/llvm/test/CodeGen/ARM/fp16-vector-cvt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fp16-vector-cvt.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple armv8a-none-none-eabihf -mattr=fullfp16 < %s | FileCheck %s + +define <4 x half> @fptrunc_vector_f32_f16(<4 x float> %a) { +; CHECK-LABEL: fptrunc_vector_f32_f16: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: vcvt.f16.f32 d0, q0 +; CHECK-NEXT: bx lr +bb: + %z = fptrunc <4 x float> %a to <4 x half> + ret <4 x half> %z +} + +define <4 x half> @fptrunc_vector_f64_f16(<4 x double> %a) { +; CHECK-LABEL: fptrunc_vector_f64_f16: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: vcvtb.f16.f64 s0, d0 +; CHECK-NEXT: vcvtb.f16.f64 s8, d1 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vcvtb.f16.f64 s2, d2 +; CHECK-NEXT: vmov r0, s8 +; CHECK-NEXT: vmov.16 d0[0], r1 +; CHECK-NEXT: vmov.16 d0[1], r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vcvtb.f16.f64 s2, d3 +; CHECK-NEXT: vmov.16 d0[2], r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.16 d0[3], r0 +; CHECK-NEXT: bx lr +bb: + %z = fptrunc <4 x double> %a to <4 x half> + ret <4 x half> %z +} + +define <4 x float> @fpext_vector_f16_f32(<4 x half> %a) { +; CHECK-LABEL: fpext_vector_f16_f32: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: vcvt.f32.f16 q0, d0 +; CHECK-NEXT: bx lr +bb: + %z = fpext <4 x half> %a to <4 x float> + ret <4 x float> %z +} + +define <4 x double> @fpext_vector_f16_f64(<4 x half> %a) { +; CHECK-LABEL: fpext_vector_f16_f64: +; CHECK: @ %bb.0: @ %bb +; CHECK-NEXT: vmovx.f16 s4, s0 +; CHECK-NEXT: vmovx.f16 s2, s1 +; CHECK-NEXT: vcvtb.f64.f16 d17, s4 +; CHECK-NEXT: vcvtb.f64.f16 d3, s2 +; CHECK-NEXT: vcvtb.f64.f16 d16, s0 +; CHECK-NEXT: vcvtb.f64.f16 d2, s1 +; CHECK-NEXT: vorr q0, q8, q8 +; CHECK-NEXT: bx lr +bb: + %z = fpext <4 x half> %a to <4 x double> + ret <4 x double> %z +}