Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4776,12 +4776,50 @@ DAG.getConstant(1, DL, XLenVT)); // Double the VL since we halved SEW. - SDValue VL = getVLOperand(Op); - SDValue I32VL = - DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); - + SDValue AVL = getVLOperand(Op); + SDValue I32VL; + + // Optimize for constant AVL + if (isa(AVL)) { + unsigned VLMAX1; + unsigned VLMAX2; + if (VT.isFixedLengthVector()) { + unsigned vl = VT.getVectorElementCount().getKnownMinValue(); + // smallest and largest vlmax are same for fixed length vector + VLMAX1 = vl; + VLMAX2 = vl; + } else { + unsigned KnownSize = VT.getSizeInBits().getKnownMinValue(); + // get the smallest vlmax (vector register size is 128 bits) + VLMAX1 = 128 / 64 * KnownSize / 64; + // get the largest vlmax (vector register size is 65536 bits) + VLMAX2 = 65536 / 64 * KnownSize / 64; + } + auto *C = dyn_cast(AVL); + unsigned AVLInt = C->getZExtValue(); + if (AVLInt <= VLMAX1) { + I32VL = DAG.getConstant(AVLInt << 1, DL, XLenVT); + } else if (AVLInt >= VLMAX2 << 1) { + I32VL = DAG.getConstant(VLMAX2 << 1, DL, XLenVT); + } else { + // For AVL between (VLMAX, 2 * VLMAX), the actual working vl + // is related to the hardware implementation. + // So let the following code handle + } + } + if (!I32VL) { + RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT); + SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT); + SDValue SEW64 = DAG.getConstant(3, DL, XLenVT); + SDValue SETVL = + DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32); + SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL, + SEW64, LMUL); + I32VL = + DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); + } MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount()); - SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL); + SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, AVL); // Shift the two scalar parts in using SEW=32 slide1up/slide1down // instructions. @@ -4831,10 +4869,11 @@ // TAMU if (Policy == RISCVII::TAIL_AGNOSTIC) return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, - VL); + AVL); // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma. // It's fine because vmerge does not care mask policy. - return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, VL); + return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff, + AVL); } } Index: llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll +++ llvm/test/CodeGen/RISCV/rvv/masked-vslide1down-rv32.ll @@ -13,7 +13,8 @@ define @intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tumu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -34,7 +35,8 @@ define @intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tamu_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -57,7 +59,8 @@ define @intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tuma_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -79,6 +82,7 @@ define @intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tama_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -98,6 +102,7 @@ define @intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_tama_undef_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 Index: llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll +++ llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll @@ -886,6 +886,7 @@ define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { ; RV32-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: ; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu ; RV32-NEXT: vmv1r.v v10, v8 @@ -917,6 +918,7 @@ define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, iXLen %3) nounwind { ; RV32-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: ; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; RV32-NEXT: slli a2, a2, 1 ; RV32-NEXT: vsetvli zero, a2, e32, m1, tu, mu ; RV32-NEXT: vmv1r.v v10, v8 Index: llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll +++ llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll @@ -856,6 +856,7 @@ define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -871,6 +872,81 @@ ret %a } +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 3, e64, m1, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 3) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 2047 +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2047) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vlen2048( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64_vlen2048: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addi a2, a2, -2048 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2048) + + ret %a +} + declare @llvm.riscv.vslide1down.mask.nxv1i64.i64( , , @@ -882,7 +958,8 @@ define @intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 @@ -909,6 +986,7 @@ define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m2, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -924,6 +1002,81 @@ ret %a } +define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl4( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 4) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl5( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 5, e64, m2, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 5) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl4095( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl4095: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a2, a2, e64, m2, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 4095) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl4096( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64_vl4096: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 4096) + + ret %a +} + declare @llvm.riscv.vslide1down.mask.nxv2i64.i64( , , @@ -935,7 +1088,8 @@ define @intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m2, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m2, ta, mu ; CHECK-NEXT: vslide1down.vx v10, v10, a0 ; CHECK-NEXT: vslide1down.vx v10, v10, a1 @@ -962,6 +1116,7 @@ define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m4, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -977,6 +1132,81 @@ ret %a } +define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl8( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 8) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl9( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 9, e64, m4, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 9) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl8191( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl8191: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 2 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a2, a2, e64, m4, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 8191) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl8192( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64_vl8192: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 2 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 8192) + + ret %a +} + declare @llvm.riscv.vslide1down.mask.nxv4i64.i64( , , @@ -988,7 +1218,8 @@ define @intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m4, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, mu ; CHECK-NEXT: vslide1down.vx v12, v12, a0 ; CHECK-NEXT: vslide1down.vx v12, v12, a1 @@ -1015,6 +1246,7 @@ define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m8, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vslide1down.vx v8, v8, a0 @@ -1030,6 +1262,82 @@ ret %a } +define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_vl16( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_vl16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 16) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_vl17( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_vl17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 17, e64, m8, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 17) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_v16383( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_v16383: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 4 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a2, a2, e64, m8, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 16383) + + ret %a +} + +define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_vl16384( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64_vl16384: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 4 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1down.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 16384) + + ret %a +} + declare @llvm.riscv.vslide1down.mask.nxv8i64.i64( , , @@ -1041,7 +1349,8 @@ define @intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m8, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vslide1down.vx v16, v16, a0 ; CHECK-NEXT: vslide1down.vx v16, v16, a1 Index: llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll +++ llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll @@ -874,6 +874,7 @@ define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vslide1up.vx v9, v8, a1 @@ -889,6 +890,81 @@ ret %a } +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 3, e64, m1, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 3) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2047: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 2047 +; CHECK-NEXT: vsetvli a2, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2047) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64_vl2048: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addi a2, a2, -2048 +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vslide1up.vx v9, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v9, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + undef, + %0, + i64 %1, + i32 2048) + + ret %a +} + declare @llvm.riscv.vslide1up.mask.nxv1i64.i64( , , @@ -900,7 +976,8 @@ define @intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m1, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, mu ; CHECK-NEXT: vslide1up.vx v10, v9, a1 ; CHECK-NEXT: vslide1up.vx v9, v10, a0 @@ -927,6 +1004,7 @@ define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m2, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vslide1up.vx v10, v8, a1 @@ -942,6 +1020,81 @@ ret %a } +define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl4( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslide1up.vx v10, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v10, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 4) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl5( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 5, e64, m2, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-NEXT: vslide1up.vx v10, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v10, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 5) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl4095( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl4095: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a2, a2, e64, m2, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-NEXT: vslide1up.vx v10, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v10, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 4095) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl4096( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64_vl4096: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu +; CHECK-NEXT: vslide1up.vx v10, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v10, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv2i64.i64( + undef, + %0, + i64 %1, + i32 4096) + + ret %a +} + declare @llvm.riscv.vslide1up.mask.nxv2i64.i64( , , @@ -953,7 +1106,8 @@ define @intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m2, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m2, ta, mu ; CHECK-NEXT: vslide1up.vx v12, v10, a1 ; CHECK-NEXT: vslide1up.vx v10, v12, a0 @@ -980,6 +1134,7 @@ define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m4, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vslide1up.vx v12, v8, a1 @@ -995,6 +1150,81 @@ ret %a } +define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl8( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vslide1up.vx v12, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v12, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 8) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl9( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 9, e64, m4, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: vslide1up.vx v12, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v12, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 9) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl8191( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl8191: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 2 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a2, a2, e64, m4, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: vslide1up.vx v12, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v12, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 8191) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl8192( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64_vl8192: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 2 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu +; CHECK-NEXT: vslide1up.vx v12, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v12, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv4i64.i64( + undef, + %0, + i64 %1, + i32 8192) + + ret %a +} + declare @llvm.riscv.vslide1up.mask.nxv4i64.i64( , , @@ -1006,7 +1236,8 @@ define @intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m4, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, mu ; CHECK-NEXT: vslide1up.vx v16, v12, a1 ; CHECK-NEXT: vslide1up.vx v12, v16, a0 @@ -1033,6 +1264,7 @@ define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a2, a2, e64, m8, ta, mu ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; CHECK-NEXT: vslide1up.vx v16, v8, a1 @@ -1048,6 +1280,82 @@ ret %a } +define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_vl16( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_vl16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 16) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_vl17( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_vl17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli a2, 17, e64, m8, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 17) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_v16383( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_v16383: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 4 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vsetvli a2, a2, e64, m8, ta, mu +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 16383) + + ret %a +} + +define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_vl16384( %0, i64 %1) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64_vl16384: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a2, 4 +; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vslide1up.nxv8i64.i64( + undef, + %0, + i64 %1, + i32 16384) + + ret %a +} + declare @llvm.riscv.vslide1up.mask.nxv8i64.i64( , , @@ -1059,7 +1367,8 @@ define @intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a2, e64, m8, ta, mu +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, mu ; CHECK-NEXT: vslide1up.vx v24, v16, a1 ; CHECK-NEXT: vslide1up.vx v16, v24, a0