Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -969,7 +969,7 @@ if (VT.getScalarType() == MVT::i1) { setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::TRUNCATE, VT, Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); } } } @@ -984,6 +984,7 @@ for (MVT VT : MVT::fp_scalable_vector_valuetypes()) { if (isTypeLegal(VT)) { + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -3775,6 +3776,8 @@ return LowerRETURNADDR(Op, DAG); case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: @@ -9073,6 +9076,20 @@ return SDValue(); } +SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + EVT EltTy = VT.getVectorElementType(); + assert(VT.isScalableVector() && isTypeLegal(VT) && + (EltTy == MVT::i1 || EltTy.isFloatingPoint()) && + "Expected legal scalable vector type!"); + + if (Op.getNumOperands() == 2) + return Op; + + return SDValue(); +} + SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1201,6 +1201,14 @@ def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)), (UZP1_PPP_B $p1, $p2)>; + // Concatenate two floating point vectors. + def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)), + (UZP1_ZZZ_H $v1, $v2)>; + def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; @@ -1725,6 +1733,11 @@ def : Pat<(nxv2i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv2i1 (reinterpret_cast (nxv4i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + // These allow bitcasts between unpacked_fp datatypes. + def : Pat<(nxv4f16 (reinterpret_cast (nxv2f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + def : Pat<(nxv8f16 (reinterpret_cast (nxv4f16 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + def : Pat<(nxv4f32 (reinterpret_cast (nxv2f32 ZPR:$src))), (COPY_TO_REGCLASS ZPR:$src, ZPR)>; + def : Pat<(nxv16i1 (and PPR:$Ps1, PPR:$Ps2)), (AND_PPzPP (PTRUE_B 31), PPR:$Ps1, PPR:$Ps2)>; def : Pat<(nxv8i1 (and PPR:$Ps1, PPR:$Ps2)), Index: llvm/test/CodeGen/AArch64/sve-split-fcvt.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -95,3 +95,144 @@ %res = fptoui %a to ret %res } + +; SINT_TO_FP + +; Split operand +define @scvtf_s_nxv4i64( %a) { +; CHECK-LABEL: scvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z1.s, p0/m, z1.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_h_nxv8i64( %a) { +; CHECK-LABEL: scvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z3.h, p0/m, z3.d +; CHECK-NEXT: scvtf z2.h, p0/m, z2.d +; CHECK-NEXT: scvtf z1.h, p0/m, z1.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; Split result +define @scvtf_s_nxv16i8( %a) { +; CHECK-LABEL: scvtf_s_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.h, z0.b +; CHECK-NEXT: sunpkhi z0.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: sunpkhi z1.s, z1.h +; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: scvtf z0.s, p0/m, z2.s +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: scvtf z2.s, p0/m, z3.s +; CHECK-NEXT: scvtf z3.s, p0/m, z4.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i32( %a) { +; CHECK-LABEL: scvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: scvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i1( %a) { +; CHECK-LABEL: scvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: scvtf z0.d, p2/m, z0.d +; CHECK-NEXT: scvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; UINT_TO_FP + +; Split operand +define @ucvtf_s_nxv4i64( %a) { +; CHECK-LABEL: ucvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_h_nxv8i64( %a) { +; CHECK-LABEL: ucvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d +; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d +; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +; Split result +define @ucvtf_d_nxv4i32( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_d_nxv4i1( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #1 // =0x1 +; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.d, p2/m, z0.d +; CHECK-NEXT: ucvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +}