Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1850,6 +1850,16 @@ // in the cost tables. EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); + unsigned NumElts = InVT.getVectorNumElements(); + + // f16 vectors are promoted to f32 before a conversion. + if (InVT.getVectorElementType() == MVT::f16) { + MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); + SDLoc dl(Op); + return DAG.getNode( + Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); + } if (VT.getSizeInBits() < InVT.getSizeInBits()) { SDLoc dl(Op); Index: llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -130,7 +130,6 @@ ret <4 x i16> %2 } - define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { ; CHECK-LABEL: sitofp_i8: ; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 @@ -227,4 +226,45 @@ ret void } +define <4 x i8> @fptosi_i8(<4 x half> %a) #0 { +; CHECK-LABEL: fptosi_i8: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptosi<4 x half> %a to <4 x i8> + ret <4 x i8> %1 +} + +define <4 x i16> @fptosi_i16(<4 x half> %a) #0 { +; CHECK-LABEL: fptosi_i16: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptosi<4 x half> %a to <4 x i16> + ret <4 x i16> %1 +} + +define <4 x i8> @fptoui_i8(<4 x half> %a) #0 { +; CHECK-LABEL: fptoui_i8: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; NOTE: fcvtzs selected here because the xtn shaves the sign bit +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptoui<4 x half> %a to <4 x i8> + ret <4 x i8> %1 +} + +define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { +; CHECK-LABEL: fptoui_i16: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptoui<4 x half> %a to <4 x i16> + ret <4 x i16> %1 +} + attributes #0 = { nounwind } Index: llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ llvm/trunk/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -367,4 +367,58 @@ ret void } +define <8 x i8> @fptosi_i8(<8 x half> %a) #0 { +; CHECK-LABEL: fptosi_i8: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: xtn v0.8b, [[I16]].8h +; CHECK-NEXT: ret + %1 = fptosi<8 x half> %a to <8 x i8> + ret <8 x i8> %1 +} + +define <8 x i16> @fptosi_i16(<8 x half> %a) #0 { +; CHECK-LABEL: fptosi_i16: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: ret + %1 = fptosi<8 x half> %a to <8 x i16> + ret <8 x i16> %1 +} + +define <8 x i8> @fptoui_i8(<8 x half> %a) #0 { +; CHECK-LABEL: fptoui_i8: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: xtn v0.8b, [[I16]].8h +; CHECK-NEXT: ret + %1 = fptoui<8 x half> %a to <8 x i8> + ret <8 x i8> %1 +} + +define <8 x i16> @fptoui_i16(<8 x half> %a) #0 { +; CHECK-LABEL: fptoui_i16: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: ret + %1 = fptoui<8 x half> %a to <8 x i16> + ret <8 x i16> %1 +} + attributes #0 = { nounwind }