diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -990,7 +990,6 @@ // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a // splat of 0 or undef) once vector selects supported in SVE codegen. See // D68877 for more details. - for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) { setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::UINT_TO_FP, VT, Custom); @@ -1018,7 +1017,7 @@ } for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) { - setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); @@ -1035,6 +1034,7 @@ for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv2f64}) { + setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); @@ -3835,6 +3835,8 @@ return LowerRETURNADDR(Op, DAG); case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); + case ISD::CONCAT_VECTORS: + return LowerCONCAT_VECTORS(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: @@ -9150,6 +9152,18 @@ return SDValue(); } +SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getValueType().isScalableVector() && + isTypeLegal(Op.getValueType()) && + "Expected legal scalable vector type!"); + + if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2) + return Op; + + return SDValue(); +} + SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1195,6 +1195,14 @@ def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)), (UZP1_PPP_B $p1, $p2)>; + // Concatenate two floating point vectors. + def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)), + (UZP1_ZZZ_H $v1, $v2)>; + def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)), + (UZP1_ZZZ_S $v1, $v2)>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -95,3 +95,144 @@ %res = fptoui %a to ret %res } + +; SINT_TO_FP + +; Split operand +define @scvtf_s_nxv4i64( %a) { +; CHECK-LABEL: scvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z1.s, p0/m, z1.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_h_nxv8i64( %a) { +; CHECK-LABEL: scvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z3.h, p0/m, z3.d +; CHECK-NEXT: scvtf z2.h, p0/m, z2.d +; CHECK-NEXT: scvtf z1.h, p0/m, z1.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; Split result +define @scvtf_s_nxv16i8( %a) { +; CHECK-LABEL: scvtf_s_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.h, z0.b +; CHECK-NEXT: sunpkhi z0.h, z0.b +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: sunpkhi z1.s, z1.h +; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: scvtf z0.s, p0/m, z2.s +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: scvtf z2.s, p0/m, z3.s +; CHECK-NEXT: scvtf z3.s, p0/m, z4.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i32( %a) { +; CHECK-LABEL: scvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: sunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sunpkhi z2.d, z0.s +; CHECK-NEXT: scvtf z0.d, p0/m, z1.d +; CHECK-NEXT: scvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_d_nxv4i1( %a) { +; CHECK-LABEL: scvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: scvtf z0.d, p2/m, z0.d +; CHECK-NEXT: scvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +; UINT_TO_FP + +; Split operand +define @ucvtf_s_nxv4i64( %a) { +; CHECK-LABEL: ucvtf_s_nxv4i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_h_nxv8i64( %a) { +; CHECK-LABEL: ucvtf_h_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d +; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d +; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +; Split result +define @ucvtf_d_nxv4i32( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpkhi z2.d, z0.s +; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d +; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_d_nxv4i1( %a) { +; CHECK-LABEL: ucvtf_d_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p1.b +; CHECK-NEXT: zip1 p3.s, p0.s, p1.s +; CHECK-NEXT: zip2 p0.s, p0.s, p1.s +; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: mov z0.d, p3/z, #1 // =0x1 +; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ucvtf z0.d, p2/m, z0.d +; CHECK-NEXT: ucvtf z1.d, p2/m, z1.d +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +}