diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3623,7 +3623,7 @@ SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenEC = WidenVT.getVectorElementCount(); EVT InVT = InOp.getValueType(); @@ -3643,14 +3643,14 @@ } EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); - unsigned InVTNumElts = InVT.getVectorNumElements(); + EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC); + ElementCount InVTEC = InVT.getVectorElementCount(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); - InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) { + InVTEC = InVT.getVectorElementCount(); + if (InVTEC == WidenEC) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); @@ -3674,9 +3674,10 @@ // it an illegal type that might lead to repeatedly splitting the input // and then widening it. To avoid this, we widen the input only if // it results in a legal type. - if (WidenNumElts % InVTNumElts == 0) { + if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) { // Widen the input and call convert on the widened input vector. - unsigned NumConcat = WidenNumElts/InVTNumElts; + unsigned NumConcat = + WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue(); SmallVector Ops(NumConcat, DAG.getUNDEF(InVT)); Ops[0] = InOp; SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); @@ -3685,7 +3686,7 @@ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } - if (InVTNumElts % WidenNumElts == 0) { + if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) { SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, DAG.getVectorIdxConstant(0, DL)); // Extract the input and convert the shorten input vector. @@ -3697,7 +3698,7 @@ // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); - SmallVector Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT)); // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -15,6 +15,16 @@ ret %res } +define @fcvts_nxv3f16( %a) { +; CHECK-LABEL: fcvts_nxv3f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fpext %a to + ret %res +} + define @fcvts_nxv4f16( %a) { ; CHECK-LABEL: fcvts_nxv4f16: ; CHECK: // %bb.0: @@ -59,6 +69,16 @@ ret %res } +define @fcvth_nxv3f32( %a) { +; CHECK-LABEL: fcvth_nxv3f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptrunc %a to + ret %res +} + define @fcvth_nxv4f32( %a) { ; CHECK-LABEL: fcvth_nxv4f32: ; CHECK: // %bb.0: @@ -143,6 +163,16 @@ ret %res } +define @fcvtzs_h_nxv7f16( %a) { +; CHECK-LABEL: fcvtzs_h_nxv7f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + define @fcvtzs_h_nxv8f16( %a) { ; CHECK-LABEL: fcvtzs_h_nxv8f16: ; CHECK: // %bb.0: @@ -193,6 +223,16 @@ ret %res } +define @fcvtzs_s_nxv3f16( %a) { +; CHECK-LABEL: fcvtzs_s_nxv3f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptosi %a to + ret %res +} + define @fcvtzs_s_nxv4f32( %a) { ; CHECK-LABEL: fcvtzs_s_nxv4f32: ; CHECK: // %bb.0: @@ -289,6 +329,16 @@ ret %res } +define @fcvtzu_h_nxv7f16( %a) { +; CHECK-LABEL: fcvtzu_h_nxv7f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + define @fcvtzu_h_nxv8f16( %a) { ; CHECK-LABEL: fcvtzu_h_nxv8f16: ; CHECK: // %bb.0: @@ -329,6 +379,26 @@ ret %res } +define @fcvtzu_s_nxv3f16( %a) { +; CHECK-LABEL: fcvtzu_s_nxv3f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + +define @fcvtzu_s_nxv3f32( %a) { +; CHECK-LABEL: fcvtzu_s_nxv3f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = fptoui %a to + ret %res +} + define @fcvtzu_s_nxv4f16( %a) { ; CHECK-LABEL: fcvtzu_s_nxv4f16: ; CHECK: // %bb.0: @@ -422,6 +492,27 @@ ret %res } +define @scvtf_h_nxv3i1( %a) { +; CHECK-LABEL: scvtf_h_nxv3i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_h_nxv3i16( %a) { +; CHECK-LABEL: scvtf_h_nxv3i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + define @scvtf_h_nxv4i1( %a) { ; CHECK-LABEL: scvtf_h_nxv4i1: ; CHECK: // %bb.0: @@ -453,6 +544,27 @@ ret %res } +define @scvtf_h_nxv7i1( %a) { +; CHECK-LABEL: scvtf_h_nxv7i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_h_nxv7i16( %a) { +; CHECK-LABEL: scvtf_h_nxv7i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + define @scvtf_h_nxv8i1( %a) { ; CHECK-LABEL: scvtf_h_nxv8i1: ; CHECK: // %bb.0: @@ -505,6 +617,27 @@ ret %res } +define @scvtf_s_nxv3i1( %a) { +; CHECK-LABEL: scvtf_s_nxv3i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + +define @scvtf_s_nxv3i32( %a) { +; CHECK-LABEL: scvtf_s_nxv3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = sitofp %a to + ret %res +} + define @scvtf_s_nxv4i1( %a) { ; CHECK-LABEL: scvtf_s_nxv4i1: ; CHECK: // %bb.0: @@ -600,6 +733,37 @@ ret %res } +define @ucvtf_h_nxv3i1( %a) { +; CHECK-LABEL: ucvtf_h_nxv3i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_h_nxv3i16( %a) { +; CHECK-LABEL: ucvtf_h_nxv3i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + +define @ucvtf_h_nxv3i32( %a) { +; CHECK-LABEL: ucvtf_h_nxv3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s +; CHECK-NEXT: ret + %res = uitofp %a to + ret %res +} + define @ucvtf_h_nxv4i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv4i1: ; CHECK: // %bb.0: