Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -562,6 +562,10 @@ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); + // Or, direct i32 -> f16 vector conversion. Set it so custom, so the + // conversion happens in two steps: v4i32 -> v4f32 -> v4f16 + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); // AArch64 doesn't have MUL.2d: setOperationAction(ISD::MUL, MVT::v2i64, Expand); Index: test/CodeGen/AArch64/fp16-v4-instructions.ll =================================================================== --- test/CodeGen/AArch64/fp16-v4-instructions.ll +++ test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -129,3 +129,45 @@ %2 = bitcast <4 x half> %a to <4 x i16> ret <4 x i16> %2 } + + +define <4 x half> @sitofp_i32(<4 x i32> %a) { +; CHECK-LABEL: sitofp_i32: +; CHECK: scvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK: fcvtn v0.4h, [[OP1]] + %1 = sitofp <4 x i32> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @sitofp_i64(<4 x i64> %a) { +; CHECK-LABEL: sitofp_i64: +; CHECK: scvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: scvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK: fcvtn v0.4h, [[OP3]].4s + %1 = sitofp <4 x i64> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @uitofp_i32(<4 x i32> %a) { +; CHECK-LABEL: uitofp_i32: +; CHECK: ucvtf [[OP1:v[0-9]+\.4s]], v0.4s +; CHECK: fcvtn v0.4h, [[OP1]] + %1 = uitofp <4 x i32> %a to <4 x half> + ret <4 x half> %1 +} + + +define <4 x half> @uitofp_i64(<4 x i64> %a) { +; CHECK-LABEL: uitofp_i64: +; CHECK: ucvtf [[OP1:v[0-9]+\.2d]], v0.2d +; CHECK-DAG: ucvtf [[OP2:v[0-9]+\.2d]], v1.2d +; CHECK-DAG: fcvtn [[OP3:v[0-9]+]].2s, [[OP1]] +; CHECK: fcvtn2 [[OP3]].4s, [[OP2]] +; CHECK: fcvtn v0.4h, [[OP3]].4s + %1 = uitofp <4 x i64> %a to <4 x half> + ret <4 x half> %1 +}