diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -60,14 +60,21 @@ case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::VP_BITREVERSE: case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break; + case ISD::VP_BSWAP: case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::VP_CTLZ_ZERO_UNDEF: + case ISD::VP_CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::PARITY: + case ISD::VP_CTPOP: case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break; + case ISD::VP_CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: @@ -283,6 +290,11 @@ Res = PromoteIntRes_FunnelShift(N); break; + case ISD::VP_FSHL: + case ISD::VP_FSHR: + Res = PromoteIntRes_VPFunnelShift(N); + break; + case ISD::IS_FPCLASS: Res = PromoteIntRes_IS_FPCLASS(N); break; @@ -516,8 +528,15 @@ } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getShiftAmountConstant(DiffBits, NVT, dl)); + SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl); + if (N->getOpcode() == ISD::BSWAP) + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + ShAmt); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + return DAG.getNode(ISD::VP_LSHR, dl, NVT, + DAG.getNode(ISD::VP_BSWAP, dl, NVT, Op, Mask, EVL), ShAmt, + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { @@ -537,9 +556,15 @@ } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode(ISD::SRL, dl, NVT, - DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), - DAG.getShiftAmountConstant(DiffBits, NVT, dl)); + SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl); + if (N->getOpcode() == ISD::BITREVERSE) + return DAG.getNode(ISD::SRL, dl, NVT, + DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), ShAmt); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + return DAG.getNode(ISD::VP_LSHR, dl, NVT, + DAG.getNode(ISD::VP_BITREVERSE, dl, NVT, Op, Mask, EVL), + ShAmt, Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -584,12 +609,19 @@ // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. - return DAG.getNode( - ISD::SUB, dl, NVT, Op, - DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, - NVT)); + SDValue ExtractLeadingBits = DAG.getConstant( + NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); + if (!N->isVPOpcode()) + return DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op), + ExtractLeadingBits); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + return DAG.getNode(ISD::VP_SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), + ExtractLeadingBits, Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { @@ -611,7 +643,10 @@ // Zero extend to the promoted type and do the count or parity there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); + if (!N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, + N->getOperand(1), N->getOperand(2)); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -635,15 +670,23 @@ } } - if (N->getOpcode() == ISD::CTTZ) { + if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), OVT.getScalarSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); + if (N->getOpcode() == ISD::CTTZ) + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); + else + Op = + DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT), + N->getOperand(1), N->getOperand(2)); } - return DAG.getNode(N->getOpcode(), dl, NVT, Op); + if (!N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), dl, NVT, Op); + return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1), + N->getOperand(2)); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -1366,6 +1409,60 @@ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt); } +// A vp version of PromoteIntRes_FunnelShift. +SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) { + SDValue Hi = GetPromotedInteger(N->getOperand(0)); + SDValue Lo = GetPromotedInteger(N->getOperand(1)); + SDValue Amt = N->getOperand(2); + SDValue Mask = N->getOperand(3); + SDValue EVL = N->getOperand(4); + if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger) + Amt = ZExtPromotedInteger(Amt); + EVT AmtVT = Amt.getValueType(); + + SDLoc DL(N); + EVT OldVT = N->getOperand(0).getValueType(); + EVT VT = Lo.getValueType(); + unsigned Opcode = N->getOpcode(); + bool IsFSHR = Opcode == ISD::VP_FSHR; + unsigned OldBits = OldVT.getScalarSizeInBits(); + unsigned NewBits = VT.getScalarSizeInBits(); + + // Amount has to be interpreted modulo the old bit width. + Amt = DAG.getNode(ISD::VP_UREM, DL, AmtVT, Amt, + DAG.getConstant(OldBits, DL, AmtVT), Mask, EVL); + + // If the promoted type is twice the size (or more), then we use the + // traditional funnel 'double' shift codegen. This isn't necessary if the + // shift amount is constant. + // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw. + // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)). + if (NewBits >= (2 * OldBits) && !isa(Amt) && + !TLI.isOperationLegalOrCustom(Opcode, VT)) { + SDValue HiShift = DAG.getConstant(OldBits, DL, VT); + Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL); + // FIXME: Replace it by vp operations. + Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT); + SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL); + Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt, + Mask, EVL); + if (!IsFSHR) + Res = DAG.getNode(ISD::VP_LSHR, DL, VT, Res, HiShift, Mask, EVL); + return Res; + } + + // Shift Lo up to occupy the upper bits of the promoted type. + SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT); + Lo = DAG.getNode(ISD::VP_SHL, DL, VT, Lo, ShiftOffset, Mask, EVL); + + // Increase Amount to shift the result into the lower bits of the promoted + // type. + if (IsFSHR) + Amt = DAG.getNode(ISD::VP_ADD, DL, AmtVT, Amt, ShiftOffset, Mask, EVL); + + return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt, Mask, EVL); +} + SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -362,6 +362,7 @@ SDValue PromoteIntRes_ABS(SDNode *N); SDValue PromoteIntRes_Rotate(SDNode *N); SDValue PromoteIntRes_FunnelShift(SDNode *N); + SDValue PromoteIntRes_VPFunnelShift(SDNode *N); SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); // Integer Operand Promotion. diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -4203,3 +4203,76 @@ %v = call @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) ret %v } + +; Test promotion. +declare @llvm.vp.bitreverse.nxv1i9(, , i32) +define @vp_bitreverse_nxv1i9( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_bitreverse_nxv1i9: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vand.vx v9, v9, a0, v0.t +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: vsll.vi v8, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v9, v8, v0.t +; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t +; RV32-NEXT: lui a0, 3 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vand.vx v9, v9, a0, v0.t +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: vsll.vi v8, v8, 2, v0.t +; RV32-NEXT: vor.vv v8, v9, v8, v0.t +; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: lui a0, 5 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vand.vx v9, v9, a0, v0.t +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: vor.vv v8, v9, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 7, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_bitreverse_nxv1i9: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t +; RV64-NEXT: vsll.vi v8, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v9, v0.t +; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV64-NEXT: lui a0, 1 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v9, v8, v0.t +; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t +; RV64-NEXT: lui a0, 3 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v9, v8, v0.t +; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 5 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vi v8, v8, 1, v0.t +; RV64-NEXT: vor.vv v8, v9, v8, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 7, v0.t +; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: vsrl.vi v8, v8, 7, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.bitreverse.nxv1i9( %va, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -1828,3 +1828,92 @@ %v = call @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) ret %v } + +; Test promotion. +declare @llvm.vp.bswap.nxv1i48(, , i32) +define @vp_bswap_nxv1i48( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_bswap_nxv1i48: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3, v0.t +; RV32-NEXT: vor.vv v9, v10, v9, v0.t +; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v10, a4, v0.t +; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v11, v11, v12, v0.t +; RV32-NEXT: vor.vv v10, v11, v10, v0.t +; RV32-NEXT: vor.vv v9, v10, v9, v0.t +; RV32-NEXT: vsll.vx v10, v8, a1, v0.t +; RV32-NEXT: vand.vx v11, v8, a3, v0.t +; RV32-NEXT: vsll.vx v11, v11, a2, v0.t +; RV32-NEXT: vor.vv v10, v10, v11, v0.t +; RV32-NEXT: vand.vx v11, v8, a4, v0.t +; RV32-NEXT: vsll.vi v11, v11, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v11, v8, v0.t +; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v9, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 16, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_bswap_nxv1i48: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 4080 +; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: vsll.vi v9, v9, 24, v0.t +; RV64-NEXT: li a0, 255 +; RV64-NEXT: slli a0, a0, 24 +; RV64-NEXT: vand.vx v10, v8, a0, v0.t +; RV64-NEXT: vsll.vi v10, v10, 8, v0.t +; RV64-NEXT: vor.vv v9, v9, v10, v0.t +; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v10, v8, a2, v0.t +; RV64-NEXT: lui a3, 16 +; RV64-NEXT: addiw a3, a3, -256 +; RV64-NEXT: vand.vx v11, v8, a3, v0.t +; RV64-NEXT: li a4, 40 +; RV64-NEXT: vsll.vx v11, v11, a4, v0.t +; RV64-NEXT: vor.vv v10, v10, v11, v0.t +; RV64-NEXT: vor.vv v9, v10, v9, v0.t +; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v11, v8, a4, v0.t +; RV64-NEXT: vand.vx v11, v11, a3, v0.t +; RV64-NEXT: vor.vv v10, v11, v10, v0.t +; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t +; RV64-NEXT: vand.vx v11, v11, a1, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vor.vv v8, v8, v11, v0.t +; RV64-NEXT: vor.vv v8, v8, v10, v0.t +; RV64-NEXT: vor.vv v8, v9, v8, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 16, v0.t +; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_bswap_nxv1i48: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: vsrl.vi v8, v8, 16, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.bswap.nxv1i48( %va, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2795,3 +2795,70 @@ %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) ret %v } + +; Test promotion. +declare @llvm.vp.ctlz.nxv1i9(, i1 immarg, , i32) +define @vp_ctlz_nxv1i9( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_nxv1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 511 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i9: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 511 +; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: li a0, 7 +; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i9( %va, i1 false, %m, i32 %evl) + ret %v +} +define @vp_ctlz_zero_undef_nxv1i9( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 511 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 142 +; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: li a0, 7 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i9: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 511 +; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: li a0, 7 +; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.ctlz.nxv1i9( %va, i1 true, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -3287,3 +3287,73 @@ %v = call @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) ret %v } + +; Test promotion. +declare @llvm.vp.ctpop.nxv1i9(, , i32) + +define @vp_ctpop_nxv1i9( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_ctpop_nxv1i9: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 511 +; RV32-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: lui a0, 5 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vand.vx v9, v9, a0, v0.t +; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: lui a0, 3 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vand.vx v9, v8, a0, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: vadd.vv v8, v9, v8, v0.t +; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: li a0, 257 +; RV32-NEXT: vmul.vx v8, v8, a0, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_ctpop_nxv1i9: +; RV64: # %bb.0: +; RV64-NEXT: li a1, 511 +; RV64-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 5 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vsub.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 3 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vadd.vv v8, v9, v8, v0.t +; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 1 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: li a0, 257 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 511 +; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.ctpop.nxv1i9( %va, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -5001,3 +5001,100 @@ %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) ret %v } + +; Test promotion. +declare @llvm.vp.cttz.nxv1i9(, i1 immarg, , i32) +define @vp_cttz_nxv1i9( %va, %m, i32 zeroext %evl) { +; RV32-LABEL: vp_cttz_nxv1i9: +; RV32: # %bb.0: +; RV32-NEXT: li a1, 512 +; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV32-NEXT: vor.vx v8, v8, a1, v0.t +; RV32-NEXT: li a0, 1 +; RV32-NEXT: vsub.vx v9, v8, a0, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v9, v0.t +; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV32-NEXT: lui a0, 5 +; RV32-NEXT: addi a0, a0, 1365 +; RV32-NEXT: vand.vx v9, v9, a0, v0.t +; RV32-NEXT: vsub.vv v8, v8, v9, v0.t +; RV32-NEXT: lui a0, 3 +; RV32-NEXT: addi a0, a0, 819 +; RV32-NEXT: vand.vx v9, v8, a0, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: vadd.vv v8, v9, v8, v0.t +; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v9, v0.t +; RV32-NEXT: lui a0, 1 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vand.vx v8, v8, a0, v0.t +; RV32-NEXT: li a0, 257 +; RV32-NEXT: vmul.vx v8, v8, a0, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: ret +; +; RV64-LABEL: vp_cttz_nxv1i9: +; RV64: # %bb.0: +; RV64-NEXT: li a1, 512 +; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; RV64-NEXT: vor.vx v8, v8, a1, v0.t +; RV64-NEXT: li a0, 1 +; RV64-NEXT: vsub.vx v9, v8, a0, v0.t +; RV64-NEXT: vnot.v v8, v8, v0.t +; RV64-NEXT: vand.vv v8, v8, v9, v0.t +; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t +; RV64-NEXT: lui a0, 5 +; RV64-NEXT: addiw a0, a0, 1365 +; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vsub.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 3 +; RV64-NEXT: addiw a0, a0, 819 +; RV64-NEXT: vand.vx v9, v8, a0, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vadd.vv v8, v9, v8, v0.t +; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v9, v0.t +; RV64-NEXT: lui a0, 1 +; RV64-NEXT: addiw a0, a0, -241 +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: li a0, 257 +; RV64-NEXT: vmul.vx v8, v8, a0, v0.t +; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV64-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: li a1, 512 +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vor.vx v8, v8, a1, v0.t +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv1i9( %va, i1 false, %m, i32 %evl) + ret %v +} +define @vp_zero_undef_cttz_nxv1i9( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vp_zero_undef_cttz_nxv1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v9, v0.t +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t +; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vp_zero_undef_cttz_nxv1i9: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: ret + %v = call @llvm.vp.cttz.nxv1i9( %va, i1 true, %m, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -1330,3 +1330,51 @@ %res = call @llvm.vp.fshl.nxv16i64( %a, %b, %c, %m, i32 %evl) ret %res } + +; Test promotion. +declare @llvm.vp.fshr.nxv1i9(, , , , i32) +define @fshr_v1i9( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: fshr_v1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 511 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t +; CHECK-NEXT: li a0, 9 +; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vadd.vi v10, v10, 7, v0.t +; CHECK-NEXT: vand.vi v11, v10, 15, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vnot.v v10, v10, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %res = call @llvm.vp.fshr.nxv1i9( %a, %b, %c, %m, i32 %evl) + ret %res +} + +declare @llvm.vp.fshl.nxv1i9(, , , , i32) +define @fshl_v1i9( %a, %b, %c, %m, i32 zeroext %evl) { +; CHECK-LABEL: fshl_v1i9: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 511 +; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma +; CHECK-NEXT: vand.vx v10, v10, a1 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t +; CHECK-NEXT: li a0, 9 +; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t +; CHECK-NEXT: vnot.v v11, v10, v0.t +; CHECK-NEXT: vand.vi v11, v11, 15, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t +; CHECK-NEXT: vand.vi v10, v10, 15, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret + %res = call @llvm.vp.fshl.nxv1i9( %a, %b, %c, %m, i32 %evl) + ret %res +}