diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -377,6 +377,11 @@ return 1 << (VSEW + 3); } +inline static unsigned encodeSEW(unsigned SEW) { + assert(isValidSEW(SEW) && "Unexpected SEW value"); + return Log2_32(SEW) - 3; +} + inline static unsigned getSEW(unsigned VType) { unsigned VSEW = (VType >> 3) & 0x7; return decodeVSEW(VSEW); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -131,7 +131,7 @@ bool TailAgnostic, bool MaskAgnostic) { assert(isValidSEW(SEW) && "Invalid SEW"); unsigned VLMULBits = static_cast(VLMUL); - unsigned VSEWBits = Log2_32(SEW) - 3; + unsigned VSEWBits = encodeSEW(SEW); unsigned VTypeI = (VSEWBits << 3) | (VLMULBits & 0x7); if (TailAgnostic) VTypeI |= 0x40; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -317,6 +317,11 @@ }; } // namespace RISCVISD +namespace RISCV { +// We use 64 bits as the known part in the scalable vector types. +static constexpr unsigned RVVBitsPerBlock = 64; +} // namespace RISCV + class RISCVTargetLowering : public TargetLowering { const RISCVSubtarget &Subtarget; @@ -531,6 +536,15 @@ Optional CC) const override; static RISCVII::VLMUL getLMUL(MVT VT); + inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, + unsigned MinSize) { + // Original equation: + // VLMAX = (VectorBits / EltSize) * LMUL + // where LMUL = MinSize / RISCV::RVVBitsPerBlock + // The following equations have been reordered to prevent loss of precision + // when calculating fractional LMUL. + return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; + }; static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul); static unsigned getSubregIndexByMVT(MVT VT, unsigned Index); static unsigned getRegClassIDForVecVT(MVT VT); @@ -671,12 +685,6 @@ return false; }; }; - -namespace RISCV { -// We use 64 bits as the known part in the scalable vector types. -static constexpr unsigned RVVBitsPerBlock = 64; -} // namespace RISCV - namespace RISCVVIntrinsicsTable { struct RISCVVIntrinsicInfo { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4659,12 +4659,58 @@ DAG.getConstant(1, DL, XLenVT)); // Double the VL since we halved SEW. - SDValue VL = getVLOperand(Op); - SDValue I32VL = - DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); + SDValue AVL = getVLOperand(Op); + SDValue I32VL; + + // Optimize for constant AVL + if (isa(AVL)) { + unsigned EltSize = VT.getScalarSizeInBits(); + unsigned MinSize = VT.getSizeInBits().getKnownMinValue(); + + unsigned VectorBitsMax = Subtarget.getRealMaxVLen(); + unsigned MaxVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); + + unsigned VectorBitsMin = Subtarget.getRealMinVLen(); + unsigned MinVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize); + + uint64_t AVLInt = cast(AVL)->getZExtValue(); + if (AVLInt <= MinVLMAX) { + I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT); + } else if (AVLInt >= 2 * MaxVLMAX) { + // Just set vl to VLMAX in this situation + RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT); + SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); + unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits()); + SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); + SDValue SETVLMAX = DAG.getTargetConstant( + Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32); + I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW, + LMUL); + } else { + // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl + // is related to the hardware implementation. + // So let the following code handle + } + } + if (!I32VL) { + RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); + SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); + unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits()); + SDValue SEW = DAG.getConstant(Sew, DL, XLenVT); + SDValue SETVL = + DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32); + // Using vsetvli instruction to get actually used length which related to + // the hardware implementation + SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, + SEW, LMUL); + I32VL = + DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); + } MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount()); - SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL); + SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, I32VL); // Shift the two scalar parts in using SEW=32 slide1up/slide1down // instructions. @@ -4704,10 +4750,11 @@ // TAMU if (Policy == RISCVII::TAIL_AGNOSTIC) return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, - VL); + AVL); // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. // It's fine because vmerge does not care mask policy. - return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, VL); + return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, + AVL); } } @@ -5606,7 +5653,8 @@ unsigned MaxVLMAX = 0; unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits(); if (VectorBitsMax != 0) - MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock; + MaxVLMAX = + RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize); unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL; MVT IntVT = VecVT.changeVectorElementTypeToInteger(); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -206,6 +206,15 @@ return 0; } unsigned getMinVLen() const { return ZvlLen; } + unsigned getMaxVLen() const { return Zvl65536b; } + unsigned getRealMinVLen() const { + unsigned VLen = getMinRVVVectorSizeInBits(); + return VLen == 0 ? getMinVLen() : VLen; + } + unsigned getRealMaxVLen() const { + unsigned VLen = getMaxRVVVectorSizeInBits(); + return VLen == 0 ? getMaxVLen() : VLen; + } RISCVABI::ABI getTargetABI() const { return TargetABI; } bool isRegisterReservedByUser(Register i) const { assert(i < RISCV::NUM_TARGET_REGS && "Register out of range"); diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll @@ -13,7 +13,8 @@ define @intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -34,7 +35,8 @@ define @intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -57,7 +59,8 @@ define @intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -79,6 +82,7 @@ define @intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -98,6 +102,7 @@ define @intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll --- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll @@ -886,6 +886,7 @@ define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { ; RV32-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: ; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu ; RV32-NEXT: vmv1r.v v10, v8 @@ -917,6 +918,7 @@ define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { ; RV32-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: ; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu ; RV32-NEXT: vmv1r.v v10, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-constant-vl-rv32.ll @@ -0,0 +1,286 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-128-65536 + +; RUN: llc -mtriple=riscv32 -riscv-v-vector-bits-min=512 -riscv-v-vector-bits-max=512 \ +; RUN: -mattr=+v -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-512 + +; RUN: llc -mtriple=riscv32 -riscv-v-vector-bits-min=64 -riscv-v-vector-bits-max=64 \ +; RUN: -mattr=+zve64x -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-64 + +declare @llvm.riscv.vslide1down.nxv1i64.i64( + , + , + i64, + i32) + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 1) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 3, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 6, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 3) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl8( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl8: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 8, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl8: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 8) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl9( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl9: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 9, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl9: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli a2, 9, e64, m1, ta, mu +; CHECK-512-NEXT: slli a2, a2, 1 +; CHECK-512-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl9: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 9) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl15( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl15: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 15, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl15: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli a2, 15, e64, m1, ta, mu +; CHECK-512-NEXT: slli a2, a2, 1 +; CHECK-512-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl15: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 15) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 16, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl16: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 16) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: li a2, 2047 +; CHECK-128-65536-NEXT: vsetvli a2, a2, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-128-65536-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-512-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-64-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2047) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2048) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll @@ -856,6 +856,7 @@ define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -882,7 +883,8 @@ define @intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -909,6 +911,7 @@ define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m2, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -935,7 +938,8 @@ define @intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m2, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m2, ta, mu ; CHECK-NEXT: vslide1down.vx v10, v10, a0 ; CHECK-NEXT: vslide1down.vx v10, v10, a1 @@ -962,6 +966,7 @@ define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m4, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -988,7 +993,8 @@ define @intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m4, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, mu ; CHECK-NEXT: vslide1down.vx v12, v12, a0 ; CHECK-NEXT: vslide1down.vx v12, v12, a1 @@ -1015,6 +1021,7 @@ define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m8, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -1041,7 +1048,8 @@ define @intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m8, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vslide1down.vx v16, v16, a0 ; CHECK-NEXT: vslide1down.vx v16, v16, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-constant-vl-rv32.ll @@ -0,0 +1,286 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-128-65536 + +; RUN: llc -mtriple=riscv32 -riscv-v-vector-bits-min=512 -riscv-v-vector-bits-max=512 \ +; RUN: -mattr=+v -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-512 + +; RUN: llc -mtriple=riscv32 -riscv-v-vector-bits-min=64 -riscv-v-vector-bits-max=64 \ +; RUN: -mattr=+zve64x -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefixes=CHECK,CHECK-64 + +declare @llvm.riscv.vslide1up.nxv1i64.i64( + , + , + i64, + i32) + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl1( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 2, e32, m1, ta, mu +; CHECK-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 1) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 3, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 6, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 3) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl8( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl8: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 8, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl8: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli zero, 16, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 8) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl9( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl9: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 9, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl9: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli a2, 9, e64, m1, ta, mu +; CHECK-512-NEXT: slli a2, a2, 1 +; CHECK-512-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl9: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 9) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl15( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl15: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 15, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl15: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetivli a2, 15, e64, m1, ta, mu +; CHECK-512-NEXT: slli a2, a2, 1 +; CHECK-512-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl15: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 15) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: vsetivli a2, 16, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl16: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 16) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047( %0, i64 %1) nounwind { +; CHECK-128-65536-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK-128-65536: # %bb.0: # %entry +; CHECK-128-65536-NEXT: li a2, 2047 +; CHECK-128-65536-NEXT: vsetvli a2, a2, e64, m1, ta, mu +; CHECK-128-65536-NEXT: slli a2, a2, 1 +; CHECK-128-65536-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-128-65536-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-128-65536-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-128-65536-NEXT: ret +; +; CHECK-512-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK-512: # %bb.0: # %entry +; CHECK-512-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-512-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-512-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-512-NEXT: ret +; +; CHECK-64-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-64-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-64-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-64-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2047) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, zero, e32, m1, ta, mu +; CHECK-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2048) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll @@ -874,6 +874,7 @@ define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1up.vx v9, v8, a1 @@ -900,7 +901,8 @@ define @intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1up.vx v10, v9, a1 ; CHECK-NEXT: vslide1up.vx v9, v10, a0 @@ -927,6 +929,7 @@ define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m2, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vslide1up.vx v10, v8, a1 @@ -953,7 +956,8 @@ define @intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m2, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m2, ta, mu ; CHECK-NEXT: vslide1up.vx v12, v10, a1 ; CHECK-NEXT: vslide1up.vx v10, v12, a0 @@ -980,6 +984,7 @@ define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m4, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vslide1up.vx v12, v8, a1 @@ -1006,7 +1011,8 @@ define @intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m4, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, mu ; CHECK-NEXT: vslide1up.vx v16, v12, a1 ; CHECK-NEXT: vslide1up.vx v12, v16, a0 @@ -1033,6 +1039,7 @@ define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m8, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vslide1up.vx v16, v8, a1 @@ -1059,7 +1066,8 @@ define @intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m8, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vslide1up.vx v24, v16, a1 ; CHECK-NEXT: vslide1up.vx v16, v24, a0