diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3793,15 +3793,32 @@ return DAG.getBitcast(Op.getSimpleValueType(), Slidedown); } -// Implement step_vector to the vid instruction. +// Lower step_vector to the vid instruction. Any non-identity step value must +// be accounted for my manual expansion. SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); - assert(Op.getConstantOperandAPInt(0) == 1 && "Unexpected step value"); MVT VT = Op.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); SDValue Mask, VL; std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); - return DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); + SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL); + uint64_t StepValImm = Op.getConstantOperandVal(0); + if (StepValImm != 1) { + assert(Op.getOperand(0).getValueType() == XLenVT && + "Unexpected step value type"); + if (isPowerOf2_64(StepValImm)) { + SDValue StepVal = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, + DAG.getConstant(Log2_64(StepValImm), DL, XLenVT)); + StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal); + } else { + SDValue StepVal = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Op.getOperand(0)); + StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal); + } + } + return StepVec; } // Implement vector_reverse using vrgather.vv with indices determined by diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -52,6 +52,51 @@ ret %v } +define @add_stepvector_nxv8i8() { +; CHECK-LABEL: add_stepvector_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vsll.vi v8, v25, 1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv8i8() + %1 = call @llvm.experimental.stepvector.nxv8i8() + %2 = add %0, %1 + ret %2 +} + +define @mul_stepvector_nxv8i8() { +; CHECK-LABEL: mul_stepvector_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: vmul.vx v8, v25, a0 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i8 3, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i8() + %3 = mul %2, %1 + ret %3 +} + +define @shl_stepvector_nxv8i8() { +; CHECK-LABEL: shl_stepvector_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu +; CHECK-NEXT: vid.v v25 +; CHECK-NEXT: vsll.vi v8, v25, 2 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i8 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i8() + %3 = shl %2, %1 + ret %3 +} + declare @llvm.experimental.stepvector.nxv16i8() define @stepvector_nxv16i8() { @@ -148,6 +193,51 @@ ret %v } +define @add_stepvector_nxv16i16() { +; CHECK-LABEL: add_stepvector_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vsll.vi v8, v28, 1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv16i16() + %1 = call @llvm.experimental.stepvector.nxv16i16() + %2 = add %0, %1 + ret %2 +} + +define @mul_stepvector_nxv16i16() { +; CHECK-LABEL: mul_stepvector_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: vmul.vx v8, v28, a0 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i16 3, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i16() + %3 = mul %2, %1 + ret %3 +} + +define @shl_stepvector_nxv16i16() { +; CHECK-LABEL: shl_stepvector_nxv16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vid.v v28 +; CHECK-NEXT: vsll.vi v8, v28, 2 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i16 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i16() + %3 = shl %2, %1 + ret %3 +} + declare @llvm.experimental.stepvector.nxv32i16() define @stepvector_nxv32i16() { @@ -220,6 +310,51 @@ ret %v } +define @add_stepvector_nxv16i32() { +; CHECK-LABEL: add_stepvector_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsll.vi v8, v8, 1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv16i32() + %1 = call @llvm.experimental.stepvector.nxv16i32() + %2 = add %0, %1 + ret %2 +} + +define @mul_stepvector_nxv16i32() { +; CHECK-LABEL: mul_stepvector_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i32 3, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i32() + %3 = mul %2, %1 + ret %3 +} + +define @shl_stepvector_nxv16i32() { +; CHECK-LABEL: shl_stepvector_nxv16i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i32 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i32() + %3 = shl %2, %1 + ret %3 +} + declare @llvm.experimental.stepvector.nxv1i64() define @stepvector_nxv1i64() { @@ -268,6 +403,51 @@ ret %v } +define @add_stepvector_nxv8i64() { +; CHECK-LABEL: add_stepvector_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsll.vi v8, v8, 1 +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv8i64() + %1 = call @llvm.experimental.stepvector.nxv8i64() + %2 = add %0, %1 + ret %2 +} + +define @mul_stepvector_nxv8i64() { +; CHECK-LABEL: mul_stepvector_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: addi a0, zero, 3 +; CHECK-NEXT: vmul.vx v8, v8, a0 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 3, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i64() + %3 = mul %2, %1 + ret %3 +} + +define @shl_stepvector_nxv8i64() { +; CHECK-LABEL: shl_stepvector_nxv8i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vsll.vi v8, v8, 2 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i64 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i64() + %3 = shl %2, %1 + ret %3 +} + declare @llvm.experimental.stepvector.nxv16i64() define @stepvector_nxv16i64() { @@ -281,3 +461,104 @@ %v = call @llvm.experimental.stepvector.nxv16i64() ret %v } + +define @add_stepvector_nxv16i64() { +; RV32-LABEL: add_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsll.vx v8, v8, a0 +; RV32-NEXT: vsrl.vx v16, v8, a0 +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 1 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: add_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 1 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv16i64() + %1 = call @llvm.experimental.stepvector.nxv16i64() + %2 = add %0, %1 + ret %2 +} + +define @mul_stepvector_nxv16i64() { +; RV32-LABEL: mul_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV32-NEXT: vid.v v8 +; RV32-NEXT: addi a0, zero, 3 +; RV32-NEXT: vmul.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: addi a1, zero, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsll.vx v16, v16, a0 +; RV32-NEXT: vsrl.vx v16, v16, a0 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: mul_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: addi a0, zero, 3 +; RV64-NEXT: vmul.vx v8, v8, a0 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: srli a0, a0, 3 +; RV64-NEXT: addi a1, zero, 24 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret +entry: + %0 = insertelement poison, i64 3, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i64() + %3 = mul %2, %1 + ret %3 +} + +define @shl_stepvector_nxv16i64() { +; RV32-LABEL: shl_stepvector_nxv16i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: addi a0, zero, 32 +; RV32-NEXT: vsll.vx v8, v8, a0 +; RV32-NEXT: vsrl.vx v16, v8, a0 +; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsll.vi v8, v8, 2 +; RV32-NEXT: vadd.vv v16, v8, v16 +; RV32-NEXT: ret +; +; RV64-LABEL: shl_stepvector_nxv16i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vsll.vi v8, v8, 2 +; RV64-NEXT: vadd.vx v16, v8, a0 +; RV64-NEXT: ret +entry: + %0 = insertelement poison, i64 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv16i64() + %3 = shl %2, %1 + ret %3 +}