diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -594,10 +594,10 @@ /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised /// of a linear sequence of unsigned values starting from 0 with a step of - /// IMM, where IMM must be a vector index constant positive integer value - /// which must fit in the vector element type. + /// IMM, where IMM must be a vector index constant integer value which must + /// fit in the vector element type. /// Note that IMM may be a smaller type than the vector element type, in - /// which case the step is implicitly zero-extended to the vector element + /// which case the step is implicitly sign-extended to the vector element /// type. IMM may also be a larger type than the vector element type, in /// which case the step is implicitly truncated to the vector element type. /// The operation does not support returning fixed-width vectors or diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3544,6 +3544,14 @@ return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); } + // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C)) + if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) { + SDValue NewStep = DAG.getConstant(-N1.getConstantOperandAPInt(0), DL, + N1.getOperand(0).getValueType()); + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getStepVector(DL, VT, NewStep)); + } + // Prefer an add for more folding potential and possibly better codegen: // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4791,7 +4791,7 @@ EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(), NOutVT.getVectorElementType()); APInt StepVal = cast(N->getOperand(0))->getAPIntValue(); - SDValue Step = DAG.getConstant(StepVal.getZExtValue(), dl, NOutElemVT); + SDValue Step = DAG.getConstant(StepVal.getSExtValue(), dl, NOutElemVT); return DAG.getStepVector(dl, NOutVT, Step); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1655,11 +1655,10 @@ // Hi = Lo + (EltCnt * Step) EVT EltVT = Step.getValueType(); + APInt StepVal = cast(Step)->getAPIntValue(); SDValue StartOfHi = - DAG.getVScale(dl, EltVT, - cast(Step)->getAPIntValue() * - LoVT.getVectorMinNumElements()); - StartOfHi = DAG.getZExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType()); + DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements()); + StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType()); StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi); Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4717,10 +4717,9 @@ "STEP_VECTOR can only be used with vectors of integers that are at " "least 8 bits wide"); assert(isa(Operand) && - cast(Operand)->getAPIntValue().isNonNegative() && cast(Operand)->getAPIntValue().isSignedIntN( VT.getScalarSizeInBits()) && - "Expected STEP_VECTOR integer constant to be positive and fit in " + "Expected STEP_VECTOR integer constant to fit in " "the vector element type"); break; case ISD::FREEZE: diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll --- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -259,6 +259,69 @@ ret %3 } +define @sub_multiple_use_stepvector_nxv8i16() { +; CHECK-LABEL: sub_multiple_use_stepvector_nxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: subr z1.h, z1.h, #2 // =0x2 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i16 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i16() + %3 = sub %1, %2 + %4 = shl %2, %3 + ret %4 +} + +define @sub_stepvector_nxv8i16() { +; CHECK-LABEL: sub_stepvector_nxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, #2, #-1 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i16 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i16() + %3 = sub %1, %2 + ret %3 +} + +define @promote_sub_stepvector_nxv8i8() { +; CHECK-LABEL: promote_sub_stepvector_nxv8i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: index z0.h, #2, #-1 +; CHECK-NEXT: ret +entry: + %0 = insertelement poison, i8 2, i32 0 + %1 = shufflevector %0, poison, zeroinitializer + %2 = call @llvm.experimental.stepvector.nxv8i8() + %3 = sub %1, %2 + ret %3 +} + +define @split_sub_stepvector_nxv16i32() { +; CHECK-LABEL: split_sub_stepvector_nxv16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cntw x9 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: neg x9, x9 +; CHECK-NEXT: index z0.s, #0, #-1 +; CHECK-NEXT: neg x8, x8 +; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z3.s, w8 +; CHECK-NEXT: add z1.s, z0.s, z1.s +; CHECK-NEXT: add z2.s, z0.s, z3.s +; CHECK-NEXT: add z3.s, z1.s, z3.s +; CHECK-NEXT: ret +entry: + %0 = call @llvm.experimental.stepvector.nxv16i32() + %1 = sub zeroinitializer, %0 + ret %1 +} declare @llvm.experimental.stepvector.nxv2i64() declare @llvm.experimental.stepvector.nxv4i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: | FileCheck %s --check-prefixes=CHECK ; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: | FileCheck %s --check-prefixes=CHECK declare @llvm.experimental.stepvector.nxv1i8() @@ -271,25 +271,13 @@ declare @llvm.experimental.stepvector.nxv16i64() define @stepvector_nxv16i64() { -; RV32-LABEL: stepvector_nxv16i64: -; RV32: # %bb.0: -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: addi a0, zero, 32 -; RV32-NEXT: vsll.vx v8, v8, a0 -; RV32-NEXT: vsrl.vx v16, v8, a0 -; RV32-NEXT: vid.v v8 -; RV32-NEXT: vadd.vv v16, v8, v16 -; RV32-NEXT: ret -; -; RV64-LABEL: stepvector_nxv16i64: -; RV64: # %bb.0: -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: vsetvli a1, zero, e64,m8,ta,mu -; RV64-NEXT: vid.v v8 -; RV64-NEXT: vadd.vx v16, v8, a0 -; RV64-NEXT: ret +; CHECK-LABEL: stepvector_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vx v16, v8, a0 +; CHECK-NEXT: ret %v = call @llvm.experimental.stepvector.nxv16i64() ret %v }