diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -612,7 +612,8 @@ SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG, int64_t ExtTrueVal) const; - SDValue lowerVectorMaskTrunc(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -492,7 +492,7 @@ ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_SEXT, - ISD::VP_ZEXT}; + ISD::VP_ZEXT, ISD::VP_TRUNC}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, @@ -579,6 +579,7 @@ setOperationAction(ISD::VP_FPTOSI, VT, Custom); setOperationAction(ISD::VP_FPTOUI, VT, Custom); + setOperationAction(ISD::VP_TRUNC, VT, Custom); } for (MVT VT : IntVecVTs) { @@ -859,6 +860,7 @@ setOperationAction(ISD::VP_FPTOSI, VT, Custom); setOperationAction(ISD::VP_FPTOUI, VT, Custom); setOperationAction(ISD::VP_SETCC, VT, Custom); + setOperationAction(ISD::VP_TRUNC, VT, Custom); continue; } @@ -3167,55 +3169,11 @@ } return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt); } - case ISD::TRUNCATE: { - SDLoc DL(Op); - MVT VT = Op.getSimpleValueType(); + case ISD::TRUNCATE: // Only custom-lower vector truncates - if (!VT.isVector()) + if (!Op.getSimpleValueType().isVector()) return Op; - - // Truncates to mask types are handled differently - if (VT.getVectorElementType() == MVT::i1) - return lowerVectorMaskTrunc(Op, DAG); - - // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary - // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which - // truncate by one power of two at a time. - MVT DstEltVT = VT.getVectorElementType(); - - SDValue Src = Op.getOperand(0); - MVT SrcVT = Src.getSimpleValueType(); - MVT SrcEltVT = SrcVT.getVectorElementType(); - - assert(DstEltVT.bitsLT(SrcEltVT) && - isPowerOf2_64(DstEltVT.getSizeInBits()) && - isPowerOf2_64(SrcEltVT.getSizeInBits()) && - "Unexpected vector truncate lowering"); - - MVT ContainerVT = SrcVT; - if (SrcVT.isFixedLengthVector()) { - ContainerVT = getContainerForFixedLengthVector(SrcVT); - Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); - } - - SDValue Result = Src; - SDValue Mask, VL; - std::tie(Mask, VL) = - getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); - LLVMContext &Context = *DAG.getContext(); - const ElementCount Count = ContainerVT.getVectorElementCount(); - do { - SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); - EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); - Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, - Mask, VL); - } while (SrcEltVT != DstEltVT); - - if (SrcVT.isFixedLengthVector()) - Result = convertFromScalableVector(VT, Result, DAG, Subtarget); - - return Result; - } + return lowerVectorTruncLike(Op, DAG); case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: if (Op.getOperand(0).getValueType().isVector() && @@ -3704,6 +3662,8 @@ return lowerVPOp(Op, DAG, Op.getOpcode() == ISD::VP_SEXT ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL); + case ISD::VP_TRUNC: + return lowerVectorTruncLike(Op, DAG); case ISD::VP_FPTOSI: return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL); case ISD::VP_FPTOUI: @@ -4356,8 +4316,9 @@ // Custom-lower truncations from vectors to mask vectors by using a mask and a // setcc operation: // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne) -SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op, - SelectionDAG &DAG) const { +SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op, + SelectionDAG &DAG) const { + bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNC; SDLoc DL(Op); EVT MaskVT = Op.getValueType(); // Only expect to custom-lower truncations to mask types @@ -4365,36 +4326,108 @@ "Unexpected type for vector mask lowering"); SDValue Src = Op.getOperand(0); MVT VecVT = Src.getSimpleValueType(); - + SDValue Mask, VL; + if (IsVPTrunc) { + Mask = Op.getOperand(1); + VL = Op.getOperand(2); + } // If this is a fixed vector, we need to convert it to a scalable vector. MVT ContainerVT = VecVT; + if (VecVT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VecVT); Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + if (IsVPTrunc) { + MVT MaskContainerVT = + getContainerForFixedLengthVector(Mask.getSimpleValueType()); + Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); + } + } + + if (!IsVPTrunc) { + std::tie(Mask, VL) = + getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); } SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT()); SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT()); SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), SplatOne); + DAG.getUNDEF(ContainerVT), SplatOne, VL); SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), SplatZero); - - if (VecVT.isScalableVector()) { - SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne); - return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE); - } - - SDValue Mask, VL; - std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); + DAG.getUNDEF(ContainerVT), SplatZero, VL); MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); SDValue Trunc = DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL); Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero, DAG.getCondCode(ISD::SETNE), Mask, VL); - return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); + if (MaskVT.isFixedLengthVector()) + Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget); + return Trunc; +} + +SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op, + SelectionDAG &DAG) const { + bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNC; + SDLoc DL(Op); + + MVT VT = Op.getSimpleValueType(); + // Only custom-lower vector truncates + assert(VT.isVector() && "Unexpected type for vector truncate lowering"); + + // Truncates to mask types are handled differently + if (VT.getVectorElementType() == MVT::i1) + return lowerVectorMaskTruncLike(Op, DAG); + + // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary + // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which + // truncate by one power of two at a time. + MVT DstEltVT = VT.getVectorElementType(); + + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + MVT SrcEltVT = SrcVT.getVectorElementType(); + + assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) && + isPowerOf2_64(SrcEltVT.getSizeInBits()) && + "Unexpected vector truncate lowering"); + + MVT ContainerVT = SrcVT; + SDValue Mask, VL; + if (IsVPTrunc) { + Mask = Op.getOperand(1); + VL = Op.getOperand(2); + } + if (SrcVT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(SrcVT); + Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget); + if (IsVPTrunc) { + MVT MaskVT = + MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount()); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + } + + SDValue Result = Src; + if (!IsVPTrunc) { + std::tie(Mask, VL) = + getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget); + } + + LLVMContext &Context = *DAG.getContext(); + const ElementCount Count = ContainerVT.getVectorElementCount(); + do { + SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2); + EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count); + Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result, + Mask, VL); + } while (SrcEltVT != DstEltVT); + + if (SrcVT.isFixedLengthVector()) + Result = convertFromScalableVector(VT, Result, DAG, Subtarget); + + return Result; } // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp-mask.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s + +declare <2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<2 x i16>, <2 x i1>, i32) + +define <2 x i1> @vtrunc_nxv2i1_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<2 x i16> %a, <2 x i1> %m, i32 %vl) + ret <2 x i1> %v +} + +define <2 x i1> @vtrunc_nxv2i1_nxv2i16_unmasked(<2 x i16> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.trunc.nxv2i16.nxv2i1(<2 x i16> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i1> %v +} + +declare <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<2 x i32>, <2 x i1>, i32) + +define <2 x i1> @vtrunc_nxv2i1_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 %vl) + ret <2 x i1> %v +} + +define <2 x i1> @vtrunc_nxv2i1_nxv2i32_unmasked(<2 x i32> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i32(<2 x i32> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i1> %v +} + +declare <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<2 x i64>, <2 x i1>, i32) + +define <2 x i1> @vtrunc_nxv2i1_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 %vl) + ret <2 x i1> %v +} + +define <2 x i1> @vtrunc_nxv2i1_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i1> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -0,0 +1,151 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s + +declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16>, <2 x i1>, i32) + +define <2 x i8> @vtrunc_nxv2i8_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 %vl) + ret <2 x i8> %v +} + +define <2 x i8> @vtrunc_nxv2i8_nxv2i16_unmasked(<2 x i16> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i8> %v +} + +declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32>, <2 x i1>, i32) + +define <2 x i8> @vtrunc_nxv2i8_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 %vl) + ret <2 x i8> %v +} + +define <2 x i8> @vtrunc_nxv2i8_nxv2i32_unmasked(<2 x i32> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i8> %v +} + +declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<2 x i64>, <2 x i1>, i32) + +define <2 x i8> @vtrunc_nxv2i8_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 %vl) + ret <2 x i8> %v +} + +define <2 x i8> @vtrunc_nxv2i8_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i64(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i8> %v +} + +declare <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<2 x i32>, <2 x i1>, i32) + +define <2 x i16> @vtrunc_nxv2i16_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 %vl) + ret <2 x i16> %v +} + +define <2 x i16> @vtrunc_nxv2i16_nxv2i32_unmasked(<2 x i32> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<2 x i32> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i16> %v +} + +declare <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<2 x i64>, <2 x i1>, i32) + +define <2 x i16> @vtrunc_nxv2i16_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 %vl) + ret <2 x i16> %v +} + +define <2 x i16> @vtrunc_nxv2i16_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call <2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i64(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i16> %v +} + +declare <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64>, <2 x i1>, i32) + +define <2 x i32> @vtrunc_nxv2i32_nxv2i64(<2 x i64> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64> %a, <2 x i1> %m, i32 %vl) + ret <2 x i32> %v +} + +define <2 x i32> @vtrunc_nxv2i32_nxv2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call <2 x i32> @llvm.vp.trunc.nxv2i64.nxv2i32(<2 x i64> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl) + ret <2 x i32> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.trunc.nxv2i16.nxv2i1(, , i32) + +define @vtrunc_nxv2i1_nxv2i16( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i16.nxv2i1( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i1_nxv2i16_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i16.nxv2i1( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i1.nxv2i32(, , i32) + +define @vtrunc_nxv2i1_nxv2i32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i1.nxv2i32( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i1_nxv2i32_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i1.nxv2i32( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i1.nxv2i64(, , i32) + +define @vtrunc_nxv2i1_nxv2i64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vand.vi v10, v8, 1, v0.t +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i1.nxv2i64( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i1_nxv2i64_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu +; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i1.nxv2i64( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.vp.trunc.nxv2i8.nxv2i16(, , i32) + +define @vtrunc_nxv2i8_nxv2i16( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i8.nxv2i16( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i8_nxv2i16_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i8.nxv2i16( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i8.nxv2i32(, , i32) + +define @vtrunc_nxv2i8_nxv2i32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i8.nxv2i32( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i8_nxv2i32_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i8.nxv2i32( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i8.nxv2i64(, , i32) + +define @vtrunc_nxv2i8_nxv2i64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v10, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i8.nxv2i64( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i8_nxv2i64_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i8.nxv2i64( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i16.nxv2i32(, , i32) + +define @vtrunc_nxv2i16_nxv2i32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i16.nxv2i32( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i16_nxv2i32_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i16.nxv2i32( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i16.nxv2i64(, , i32) + +define @vtrunc_nxv2i16_nxv2i64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v10, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v10, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i16.nxv2i64( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i16_nxv2i64_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i16_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i16.nxv2i64( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i64.nxv2i32(, , i32) + +define @vtrunc_nxv2i32_nxv2i64( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v10, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i64.nxv2i32( %a, %m, i32 %vl) + ret %v +} + +define @vtrunc_nxv2i32_nxv2i64_unmasked( %a, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i32_nxv2i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i64.nxv2i32( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) + ret %v +}