diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -98,6 +98,7 @@ case ISD::VP_ASHR: Res = PromoteIntRes_SRA(N); break; case ISD::SRL: case ISD::VP_LSHR: Res = PromoteIntRes_SRL(N); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; @@ -1325,6 +1326,7 @@ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; SDValue InOp = N->getOperand(0); + unsigned Opcode = N->getOpcode(); SDLoc dl(N); switch (getTypeAction(InOp.getValueType())) { @@ -1350,11 +1352,22 @@ EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), NumElts.divideCoefficientBy(2)); - EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); - EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + if (Opcode == ISD::TRUNCATE) { + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + } + if (Opcode == ISD::VP_TRUNCATE) { + SDValue MaskLo, MaskHi, EVLLo, EVLHi; + std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); + std::tie(EVLLo, EVLHi) = + DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); + EOp1 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp1, MaskLo, EVLLo); + EOp2 = DAG.getNode(ISD::VP_TRUNCATE, dl, HalfNVT, EOp2, MaskHi, EVLHi); + } return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); } + // TODO: VP_TRUNCATE need to handle if necessary for some targets. case TargetLowering::TypeWidenVector: { SDValue WideInOp = GetWidenedVector(InOp); @@ -1376,7 +1389,10 @@ } // Truncate to NVT instead of VT - return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); + return Opcode == ISD::VP_TRUNCATE + ? DAG.getNode(Opcode, dl, NVT, Res, N->getOperand(1), + N->getOperand(2)) + : DAG.getNode(Opcode, dl, NVT, Res); } SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { @@ -1628,6 +1644,7 @@ OpNo); break; case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast(N), OpNo); break; + case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -2078,7 +2095,10 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); + return N->getOpcode() == ISD::VP_TRUNCATE + ? DAG.getNode(ISD::VP_TRUNCATE, SDLoc(N), N->getValueType(0), Op, + N->getOperand(1), N->getOperand(2)) + : DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -2,6 +2,30 @@ ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +declare <2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<2 x i16>, <2 x i1>, i32) + +define <2 x i7> @vtrunc_nxv2i7_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i7_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i7> @llvm.vp.trunc.nxv2i7.nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 %vl) + ret <2 x i7> %v +} + +declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i15(<2 x i15>, <2 x i1>, i32) + +define <2 x i8> @vtrunc_nxv2i8_nxv2i15(<2 x i15> %a, <2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i15: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i15(<2 x i15> %a, <2 x i1> %m, i32 %vl) + ret <2 x i8> %v +} + declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i16(<2 x i16>, <2 x i1>, i32) define <2 x i8> @vtrunc_nxv2i8_nxv2i16(<2 x i16> %a, <2 x i1> %m, i32 zeroext %vl) { @@ -24,6 +48,52 @@ ret <2 x i8> %v } +declare <128 x i7> @llvm.vp.trunc.nxv128i7.nxv128i16(<128 x i16>, <128 x i1>, i32) + +define <128 x i7> @vtrunc_nxv128i7_nxv128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv128i7_nxv128i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, mu +; CHECK-NEXT: addi a2, a0, -64 +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: bltu a0, a2, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vncvt.x.x.w v8, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB4_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: li a0, 64 +; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8re8.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vncvt.x.x.w v16, v24, v0.t +; CHECK-NEXT: li a0, 128 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, mu +; CHECK-NEXT: vslideup.vx v16, v8, a1 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %v = call <128 x i7> @llvm.vp.trunc.nxv128i7.nxv128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl) + ret <128 x i7> %v +} + declare <2 x i8> @llvm.vp.trunc.nxv2i8.nxv2i32(<2 x i32>, <2 x i1>, i32) define <2 x i8> @vtrunc_nxv2i8_nxv2i32(<2 x i32> %a, <2 x i1> %m, i32 zeroext %vl) { diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -2,6 +2,30 @@ ; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +declare @llvm.vp.trunc.nxv2i7.nxv2i16(, , i32) + +define @vtrunc_nxv2i7_nxv2i16( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i7_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i7.nxv2i16( %a, %m, i32 %vl) + ret %v +} + +declare @llvm.vp.trunc.nxv2i8.nxv2i15(, , i32) + +define @vtrunc_nxv2i8_nxv2i15( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv2i8_nxv2i15: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v8, v8, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv2i8.nxv2i15( %a, %m, i32 %vl) + ret %v +} + declare @llvm.vp.trunc.nxv2i8.nxv2i16(, , i32) define @vtrunc_nxv2i8_nxv2i16( %a, %m, i32 zeroext %vl) { @@ -151,3 +175,39 @@ %v = call @llvm.vp.trunc.nxv2i64.nxv2i32( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) ret %v } + +declare @llvm.vp.trunc.nxv32i7.nxv32i32(, , i32) + +define @vtrunc_nxv32i7_nxv32i32( %a, %m, i32 zeroext %vl) { +; CHECK-LABEL: vtrunc_nxv32i7_nxv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, mu +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub a3, a0, a1 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: bltu a0, a3, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu +; CHECK-NEXT: vncvt.x.x.w v28, v16, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v18, v28, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB14_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: .LBB14_4: +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vncvt.x.x.w v20, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, mu +; CHECK-NEXT: vncvt.x.x.w v16, v20, v0.t +; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: ret + %v = call @llvm.vp.trunc.nxv32i7.nxv32i32( %a, %m, i32 %vl) + ret %v +}