diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -128,10 +128,11 @@ // and the fifth the VL. VSLIDEUP_VL, VSLIDEDOWN_VL, - // Matches the semantics of vslide1up. The first operand is the source - // vector, the second is the XLenVT scalar value. The third and fourth + // Matches the semantics of vslide1up/slide1down. The first operand is the + // source vector, the second is the XLenVT scalar value. The third and fourth // operands are the mask and VL operands. VSLIDE1UP_VL, + VSLIDE1DOWN_VL, // Matches the semantics of the vid.v instruction, with a mask and VL // operand. VID_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2952,6 +2952,70 @@ return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal, Vec, VL); } + case Intrinsic::riscv_vslide1up: + case Intrinsic::riscv_vslide1down: + case Intrinsic::riscv_vslide1up_mask: + case Intrinsic::riscv_vslide1down_mask: { + // We need to special case these when the scalar is larger than XLen. + unsigned NumOps = Op.getNumOperands(); + bool IsMasked = NumOps == 6; + unsigned OpOffset = IsMasked ? 1 : 0; + SDValue Scalar = Op.getOperand(2 + OpOffset); + if (Scalar.getValueType().bitsLE(XLenVT)) + break; + + // Splatting a sign extended constant is fine. + if (auto *CVal = dyn_cast(Scalar)) + if (isInt<32>(CVal->getSExtValue())) + break; + + MVT VT = Op.getSimpleValueType(); + assert(VT.getVectorElementType() == MVT::i64 && + Scalar.getValueType() == MVT::i64 && "Unexpected VTs"); + + // Convert the vector source to the equivalent nxvXi32 vector. + MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2); + SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset)); + + SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, + DAG.getConstant(0, DL, XLenVT)); + SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar, + DAG.getConstant(1, DL, XLenVT)); + + // Double the VL since we halved SEW. + SDValue VL = Op.getOperand(NumOps - 1); + SDValue I32VL = + DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT)); + + MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount()); + SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL); + + // Shift the two scalar parts in using SEW=32 slide1up/slide1down + // instructions. + if (IntNo == Intrinsic::riscv_vslide1up || + IntNo == Intrinsic::riscv_vslide1up_mask) { + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi, + I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo, + I32Mask, I32VL); + } else { + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo, + I32Mask, I32VL); + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi, + I32Mask, I32VL); + } + + // Convert back to nxvXi64. + Vec = DAG.getBitcast(VT, Vec); + + if (!IsMasked) + return Vec; + + // Apply mask after the operation. + SDValue Mask = Op.getOperand(NumOps - 2); + SDValue MaskedOff = Op.getOperand(1); + return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL); + } } return lowerVectorIntrinsicSplats(Op, DAG, Subtarget); @@ -6977,6 +7041,7 @@ NODE_NAME_CASE(VSLIDEUP_VL) NODE_NAME_CASE(VSLIDE1UP_VL) NODE_NAME_CASE(VSLIDEDOWN_VL) + NODE_NAME_CASE(VSLIDE1DOWN_VL) NODE_NAME_CASE(VID_VL) NODE_NAME_CASE(VFNCVT_ROD_VL) NODE_NAME_CASE(VECREDUCE_ADD_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1144,6 +1144,7 @@ def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>; def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>; def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>; +def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>; let Predicates = [HasStdExtV] in { @@ -1157,6 +1158,11 @@ (XLenVT (VLOp GPR:$vl)))), (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX) vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; + def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rs1), + GPR:$rs2, (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX) + vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.SEW)>; } foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in { diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll @@ -792,3 +792,203 @@ ret %a } + +declare @llvm.riscv.vslide1down.nxv1i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v25, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v25, a1 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv1i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv1i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m1,ta,mu +; CHECK-NEXT: vslide1down.vx v25, v9, a0 +; CHECK-NEXT: vslide1down.vx v25, v25, a1 +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v25, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv2i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v26, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v26, a1 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv2i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv2i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m2,ta,mu +; CHECK-NEXT: vslide1down.vx v26, v10, a0 +; CHECK-NEXT: vslide1down.vx v26, v26, a1 +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv4i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v28, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v28, a1 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv4i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv4i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m4,ta,mu +; CHECK-NEXT: vslide1down.vx v28, v12, a0 +; CHECK-NEXT: vslide1down.vx v28, v28, a1 +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1down.nxv8i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.nxv8i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1down.mask.nxv8i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1down_mask_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m8,ta,mu +; CHECK-NEXT: vslide1down.vx v16, v16, a0 +; CHECK-NEXT: vslide1down.vx v16, v16, a1 +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1down.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll @@ -810,3 +810,203 @@ ret %a } + +declare @llvm.riscv.vslide1up.nxv1i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v25, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv1i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv1i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv1i64_nxv1i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m1,ta,mu +; CHECK-NEXT: vslide1up.vx v25, v9, a1 +; CHECK-NEXT: vslide1up.vx v26, v25, a0 +; CHECK-NEXT: vsetvli a0, a2, e64,m1,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v26, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv1i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv2i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v26, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v26, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv2i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv2i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv2i64_nxv2i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m2,ta,mu +; CHECK-NEXT: vslide1up.vx v26, v10, a1 +; CHECK-NEXT: vslide1up.vx v28, v26, a0 +; CHECK-NEXT: vsetvli a0, a2, e64,m2,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v28, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv2i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv4i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v28, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v28, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv4i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv4i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv4i64_nxv4i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m4,ta,mu +; CHECK-NEXT: vslide1up.vx v28, v12, a1 +; CHECK-NEXT: vslide1up.vx v12, v28, a0 +; CHECK-NEXT: vsetvli a0, a2, e64,m4,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv4i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +} + +declare @llvm.riscv.vslide1up.nxv8i64.i64( + , + i64, + i32); + +define @intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64( %0, i64 %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: vsetvli a2, a2, e32,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v16, v8, a1 +; CHECK-NEXT: vslide1up.vx v8, v16, a0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.nxv8i64.i64( + %0, + i64 %1, + i32 %2) + + ret %a +} + +declare @llvm.riscv.vslide1up.mask.nxv8i64.i64( + , + , + i64, + , + i32); + +define @intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { +; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: vsetvli a3, a3, e32,m8,ta,mu +; CHECK-NEXT: vslide1up.vx v24, v16, a1 +; CHECK-NEXT: vslide1up.vx v16, v24, a0 +; CHECK-NEXT: vsetvli a0, a2, e64,m8,ta,mu +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: jalr zero, 0(ra) +entry: + %a = call @llvm.riscv.vslide1up.mask.nxv8i64.i64( + %0, + %1, + i64 %2, + %3, + i32 %4) + + ret %a +}