Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -984,6 +984,7 @@ for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { if (isTypeLegal(VT)) { + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -3775,6 +3776,8 @@ return LowerRETURNADDR(Op, DAG); case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: @@ -9073,6 +9076,28 @@ return SDValue(); } +SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + EVT OpVT = Op.getOperand(0).getValueType(); + + assert(VT.isScalableVector() && isTypeLegal(VT) && + "Expected legal scalable vector type!"); + + if (!VT.getVectorElementType().isFloatingPoint() || + VT.getVectorElementCount() != (OpVT.getVectorElementCount()*2)) + return SDValue(); + + SDLoc DL(Op); + SDValue OpLHS = Op.getOperand(0); + SDValue OpRHS = Op.getOperand(1); + + OpLHS = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, OpLHS); + OpRHS = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, OpRHS); + + return DAG.getNode(AArch64ISD::UZP1, DL, VT, OpLHS, OpRHS); +} + SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1725,6 +1725,11 @@ def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + // These allow bitcasts between unpacked_fp datatypes. + def : Pat<(nxv4f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)), (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>; def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)), Index: llvm/test/CodeGen/AArch64/sve-split-fcvt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -95,3 +95,144 @@ %res = fptoui %a to ret %res } + +; SINT_TO_FP + +; Split operand +define @scvtf_s_nxv4i64( %a) { +; CHECK-LABEL: scvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z1.s, p0/m, z1.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_h_nxv8i64( %a) { +; CHECK-LABEL: scvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z3.h, p0/m, z3.d +; CHECK-NEXT: scvtf z2.h, p0/m, z2.d +; CHECK-NEXT: scvtf z1.h, p0/m, z1.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; Split result +define @scvtf_s_nxv16i8( %a) { +; CHECK-LABEL: scvtf_s_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.h, z0.b +; CHECK-NEXT: sunpkhi z0.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: sunpkhi z1.s, z1.h +; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: scvtf z0.s, p0/m, z2.s +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: scvtf z2.s, p0/m, z3.s +; CHECK-NEXT: scvtf z3.s, p0/m, z4.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i32( %a) { +; CHECK-LABEL: scvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: scvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i1( %a) { +; CHECK-LABEL: scvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: scvtf z0.d, p2/m, z0.d +; CHECK-NEXT: scvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; UINT_TO_FP + +; Split operand +define @ucvtf_s_nxv4i64( %a) { +; CHECK-LABEL: ucvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_h_nxv8i64( %a) { +; CHECK-LABEL: ucvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d +; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d +; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +; Split result +define @ucvtf_d_nxv4i32( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_d_nxv4i1( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #1 // =0x1 +; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.d, p2/m, z0.d +; CHECK-NEXT: ucvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +}