Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -651,9 +651,13 @@ // it have a FP_TO_[SU]INT instruction with a narrower destination than // source. setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); @@ -4844,12 +4848,24 @@ return DAG.UnrollVectorOp(Op.getNode()); } - assert(Op.getOperand(0).getValueType() == MVT::v4f32 && - "Invalid type for custom lowering!"); - if (VT != MVT::v4i16) + const bool HasFullFP16 = + static_cast(DAG.getSubtarget()).hasFullFP16(); + + EVT NewTy; + const EVT OpTy = Op.getOperand(0).getValueType(); + if (OpTy == MVT::v4f32) + NewTy = MVT::v4i32; + else if (OpTy == MVT::v4f16 && HasFullFP16) + NewTy = MVT::v4i16; + else if (OpTy == MVT::v8f16 && HasFullFP16) + NewTy = MVT::v8i16; + else + llvm_unreachable("Invalid type for custom lowering!"); + + if (VT != MVT::v4i16 && VT != MVT::v8i16) return DAG.UnrollVectorOp(Op.getNode()); - Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); + Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } @@ -4882,9 +4898,21 @@ return DAG.UnrollVectorOp(Op.getNode()); } - assert(Op.getOperand(0).getValueType() == MVT::v4i16 && + assert((Op.getOperand(0).getValueType() == MVT::v4i16 || + Op.getOperand(0).getValueType() == MVT::v8i16) && "Invalid type for custom lowering!"); - if (VT != MVT::v4f32) + + const bool HasFullFP16 = + static_cast(DAG.getSubtarget()).hasFullFP16(); + + EVT DestVecType; + if (VT == MVT::v4f32) + DestVecType = MVT::v4i32; + else if (VT == MVT::v4f16 && HasFullFP16) + DestVecType = MVT::v4i16; + else if (VT == MVT::v8f16 && HasFullFP16) + DestVecType = MVT::v8i16; + else return DAG.UnrollVectorOp(Op.getNode()); unsigned CastOpc; @@ -4901,7 +4929,7 @@ break; } - Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); + Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0)); return DAG.getNode(Opc, dl, VT, Op); } Index: test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll =================================================================== --- test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll +++ test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll @@ -121,55 +121,77 @@ ret <8 x i16> %vcltz.i } -; FIXME (PR38404) -; -;define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) { -;entry: -; %vcvt.i = sitofp <4 x i16> %a to <4 x half> -; ret <4 x half> %vcvt.i -;} -; -;define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) { -;entry: -; %vcvt.i = sitofp <8 x i16> %a to <8 x half> -; ret <8 x half> %vcvt.i -;} +define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) { +; CHECK-LABEL: test_vcvt_f16_s16: +; CHECK: vcvt.f16.s16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = sitofp <4 x i16> %a to <4 x half> + ret <4 x half> %vcvt.i +} -;define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) { -;entry: -; %vcvt.i = uitofp <4 x i16> %a to <4 x half> -; ret <4 x half> %vcvt.i -;} +define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vcvtq_f16_s16: +; CHECK: vcvt.f16.s16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = sitofp <8 x i16> %a to <8 x half> + ret <8 x half> %vcvt.i +} -;define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) { -;entry: -; %vcvt.i = uitofp <8 x i16> %a to <8 x half> -; ret <8 x half> %vcvt.i -;} +define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) { +; CHECK-LABEL: test_vcvt_f16_u16: +; CHECK: vcvt.f16.u16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = uitofp <4 x i16> %a to <4 x half> + ret <4 x half> %vcvt.i +} -;define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) { -;entry: -; %vcvt.i = fptosi <4 x half> %a to <4 x i16> -; ret <4 x i16> %vcvt.i -;} +define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vcvtq_f16_u16: +; CHECK: vcvt.f16.u16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = uitofp <8 x i16> %a to <8 x half> + ret <8 x half> %vcvt.i +} -;define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) { -;entry: -; %vcvt.i = fptosi <8 x half> %a to <8 x i16> -; ret <8 x i16> %vcvt.i -;} +define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) { +; CHECK-LABEL: test_vcvt_s16_f16: +; CHECK: vcvt.s16.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = fptosi <4 x half> %a to <4 x i16> + ret <4 x i16> %vcvt.i +} -;define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) { -;entry: -; %vcvt.i = fptoui <4 x half> %a to <4 x i16> -; ret <4 x i16> %vcvt.i -;} +define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) { +; CHECK-LABEL: test_vcvtq_s16_f16: +; CHECK: vcvt.s16.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = fptosi <8 x half> %a to <8 x i16> + ret <8 x i16> %vcvt.i +} -;define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) { -;entry: -; %vcvt.i = fptoui <8 x half> %a to <8 x i16> -; ret <8 x i16> %vcvt.i -;} +define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) { +; CHECK-LABEL: test_vcvt_u16_f16: +; CHECK: vcvt.u16.f16 d0, d0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = fptoui <4 x half> %a to <4 x i16> + ret <4 x i16> %vcvt.i +} + +define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) { +; CHECK-LABEL: test_vcvtq_u16_f16: +; CHECK: vcvt.u16.f16 q0, q0 +; CHECK-NEXT: bx lr +entry: + %vcvt.i = fptoui <8 x half> %a to <8 x i16> + ret <8 x i16> %vcvt.i +} define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) { ; CHECK-LABEL: test_vcvta_s16_f16: