diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4742,41 +4742,37 @@ } // Constant fold unary operations with a vector integer or float operand. - if (BuildVectorSDNode *BV = dyn_cast(Operand)) { - if (BV->isConstant()) { - switch (Opcode) { - default: - // FIXME: Entirely reasonable to perform folding of other unary - // operations here as the need arises. - break; - case ISD::FNEG: - case ISD::FABS: - case ISD::FCEIL: - case ISD::FTRUNC: - case ISD::FFLOOR: - case ISD::FP_EXTEND: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::TRUNCATE: - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::UINT_TO_FP: - case ISD::SINT_TO_FP: - case ISD::ABS: - case ISD::BITREVERSE: - case ISD::BSWAP: - case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: - case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: - case ISD::CTPOP: { - SDValue Ops = { Operand }; - if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) - return Fold; - } - } - } + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::TRUNCATE: + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::ABS: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: { + SDValue Ops = {Operand}; + if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return Fold; + } } unsigned OpOpcode = Operand.getNode()->getOpcode(); @@ -5292,30 +5288,26 @@ if (!VT.isVector()) return SDValue(); - // TODO: All the folds below are performed lane-by-lane and assume a fixed - // vector width, however we should be able to do constant folds involving - // splat vector nodes too. - if (VT.isScalableVector()) - return SDValue(); - - // From this point onwards all vectors are assumed to be fixed width. - unsigned NumElts = VT.getVectorNumElements(); + ElementCount NumElts = VT.getVectorElementCount(); - auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { + auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { return !Op.getValueType().isVector() || - Op.getValueType().getVectorNumElements() == NumElts; + Op.getValueType().getVectorElementCount() == NumElts; }; - auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { + auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { + APInt SplatVal; BuildVectorSDNode *BV = dyn_cast(Op); - return (Op.isUndef()) || (Op.getOpcode() == ISD::CONDCODE) || - (BV && BV->isConstant()); + return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || + (BV && BV->isConstant()) || + (Op.getOpcode() == ISD::SPLAT_VECTOR && + ISD::isConstantSplatVector(Op.getNode(), SplatVal)); }; // All operands must be vector types with the same number of elements as // the result type and must be either UNDEF or a build vector of constant // or UNDEF scalars. - if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) || + if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) || !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); @@ -5332,14 +5324,19 @@ return SDValue(); } + // For scalable vector types we know we're dealing with SPLAT_VECTORs. We + // only have one operand to check. For fixed-length vector types we may have + // a combination of BUILD_VECTOR and SPLAT_VECTOR. + unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); + // Constant fold each scalar lane separately. SmallVector ScalarResults; - for (unsigned i = 0; i != NumElts; i++) { + for (unsigned I = 0; I != NumOperands; I++) { SmallVector ScalarOps; for (SDValue Op : Ops) { EVT InSVT = Op.getValueType().getScalarType(); - BuildVectorSDNode *InBV = dyn_cast(Op); - if (!InBV) { + if (Op.getOpcode() != ISD::BUILD_VECTOR && + Op.getOpcode() != ISD::SPLAT_VECTOR) { // We've checked that this is UNDEF or a constant of some kind. if (Op.isUndef()) ScalarOps.push_back(getUNDEF(InSVT)); @@ -5348,7 +5345,8 @@ continue; } - SDValue ScalarOp = InBV->getOperand(i); + SDValue ScalarOp = + Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I); EVT ScalarVT = ScalarOp.getValueType(); // Build vector (integer) scalar operands may need implicit @@ -5373,7 +5371,8 @@ ScalarResults.push_back(ScalarResult); } - SDValue V = getBuildVector(VT, DL, ScalarResults); + SDValue V = NumElts.isScalable() ? getSplatVector(VT, DL, ScalarResults[0]) + : getBuildVector(VT, DL, ScalarResults); NewSDValueDbgMsg(V, "New node fold constant vector: ", this); return V; } diff --git a/llvm/test/CodeGen/AArch64/sve-expand-div.ll b/llvm/test/CodeGen/AArch64/sve-expand-div.ll --- a/llvm/test/CodeGen/AArch64/sve-expand-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-expand-div.ll @@ -73,11 +73,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z1.b, #-85 // =0xffffffffffffffab ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z2.b, #1 // =0x1 -; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z0.b -; CHECK-NEXT: lsr z1.b, z1.b, #1 -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, #3 -; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b +; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: lsr z0.b, z0.b, #1 ; CHECK-NEXT: ret %div = udiv %a, shufflevector ( insertelement ( undef, i8 3, i32 0), undef, zeroinitializer) ret %div @@ -87,13 +84,10 @@ ; CHECK-LABEL: udiv_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-21845 +; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.h, #1 // =0x1 -; CHECK-NEXT: umulh z2.h, p0/m, z2.h, z0.h -; CHECK-NEXT: lsr z2.h, z2.h, #1 -; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, #3 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h +; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: lsr z0.h, z0.h, #1 ; CHECK-NEXT: ret %div = udiv %a, shufflevector ( insertelement ( undef, i16 3, i32 0), undef, zeroinitializer) ret %div @@ -104,13 +98,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #43691 ; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov z1.s, #3 // =0x3 -; CHECK-NEXT: umulh z2.s, p0/m, z2.s, z0.s -; CHECK-NEXT: lsr z2.s, z2.s, #1 -; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, #1 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: lsr z0.s, z0.s, #1 ; CHECK-NEXT: ret %div = udiv %a, shufflevector ( insertelement ( undef, i32 3, i32 0), undef, zeroinitializer) ret %div @@ -121,13 +112,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: movk x8, #43691 +; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: mov z1.d, #3 // =0x3 -; CHECK-NEXT: umulh z2.d, p0/m, z2.d, z0.d -; CHECK-NEXT: lsr z2.d, z2.d, #1 -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, #1 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: lsr z0.d, z0.d, #1 ; CHECK-NEXT: ret %div = udiv %a, shufflevector ( insertelement ( undef, i64 3, i32 0), undef, zeroinitializer) ret %div diff --git a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll --- a/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll +++ b/llvm/test/CodeGen/Hexagon/isel-memory-vNi1.ll @@ -33,15 +33,12 @@ ; CHECK-NEXT: r0 = memub(r0+#0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r2 = #0 +; CHECK-NEXT: r3:2 = combine(#0,#0) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r5:4 = vsxtbh(r1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r3:2 = vsxtbh(r2) -; CHECK-NEXT: } -; CHECK-NEXT: { ; CHECK-NEXT: p0 = r0 ; CHECK-NEXT: } ; CHECK-NEXT: { @@ -142,16 +139,13 @@ ; CHECK-LABEL: f5: ; CHECK: // %bb.0: // %b0 ; CHECK-NEXT: { -; CHECK-NEXT: r2 = #0 -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r5:4 = vsxtbh(r1) +; CHECK-NEXT: r3:2 = vsxtbh(r1) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: r3:2 = vsxtbh(r2) +; CHECK-NEXT: r5:4 = combine(#0,#0) ; CHECK-NEXT: } ; CHECK-NEXT: { -; CHECK-NEXT: p0 = vcmph.eq(r5:4,r3:2) +; CHECK-NEXT: p0 = vcmph.eq(r3:2,r5:4) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r1 = mux(p0,#0,#1) diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 + +; These tests check that the scalable-vector version of this series of +; instructions does not get into an infinite DAGCombine loop. This was +; originally exposing an infinite loop between an 'and' of two truncates being promoted +; to the larger value type, then that 'truncate' being split back up into an +; 'and' of two truncates. +; This didn't happen in the fixed-length test because a truncate of the +; constant BUILD_VECTOR is folded into the BUILD_VECTOR itself. The truncate of +; a constant SPLAT_VECTOR didn't follow suit. + +define <2 x i16> @fixedlen(<2 x i32> %x) { +; RV32-LABEL: fixedlen: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 2, e32,mf2,ta,mu +; RV32-NEXT: vsrl.vi v25, v8, 16 +; RV32-NEXT: lui a0, 1048568 +; RV32-NEXT: vand.vx v25, v25, a0 +; RV32-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; RV32-NEXT: vnsrl.wi v8, v25, 0 +; RV32-NEXT: ret +; +; RV64-LABEL: fixedlen: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 2, e32,mf2,ta,mu +; RV64-NEXT: vsrl.vi v25, v8, 16 +; RV64-NEXT: lui a0, 32 +; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: slli a0, a0, 15 +; RV64-NEXT: vand.vx v25, v25, a0 +; RV64-NEXT: vsetvli zero, zero, e16,mf4,ta,mu +; RV64-NEXT: vnsrl.wi v8, v25, 0 +; RV64-NEXT: ret + %v41 = insertelement <2 x i32> undef, i32 16, i32 0 + %v42 = shufflevector <2 x i32> %v41, <2 x i32> undef, <2 x i32> zeroinitializer + %v43 = lshr <2 x i32> %x, %v42 + %v44 = trunc <2 x i32> %v43 to <2 x i16> + %v45 = insertelement <2 x i32> undef, i32 -32768, i32 0 + %v46 = shufflevector <2 x i32> %v45, <2 x i32> undef, <2 x i32> zeroinitializer + %v47 = trunc <2 x i32> %v46 to <2 x i16> + %v48 = and <2 x i16> %v44, %v47 + ret <2 x i16> %v48 +} + +define @scalable( %x) { +; CHECK-LABEL: scalable: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vsrl.vi v25, v8, 16 +; CHECK-NEXT: vsetvli zero, zero, e16,mf2,ta,mu +; CHECK-NEXT: vnsrl.wi v25, v25, 0 +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vand.vx v8, v25, a0 +; CHECK-NEXT: ret + %v41 = insertelement undef, i32 16, i32 0 + %v42 = shufflevector %v41, undef, zeroinitializer + %v43 = lshr %x, %v42 + %v44 = trunc %v43 to + %v45 = insertelement undef, i32 -32768, i32 0 + %v46 = shufflevector %v45, undef, zeroinitializer + %v47 = trunc %v46 to + %v48 = and %v44, %v47 + ret %v48 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll @@ -3077,8 +3077,7 @@ ; CHECK-LABEL: icmp_eq_ii_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8,m1,ta,mu -; CHECK-NEXT: vmv.v.i v25, 5 -; CHECK-NEXT: vmseq.vi v0, v25, 2 +; CHECK-NEXT: vmclr.m v0 ; CHECK-NEXT: ret %heada = insertelement undef, i8 5, i32 0 %splata = shufflevector %heada, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv32.ll @@ -29,10 +29,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -92,10 +89,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -131,10 +125,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -170,10 +161,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -209,10 +197,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 5 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v26, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -248,10 +233,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 -; CHECK-NEXT: vsrl.vi v28, v28, 5 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v28, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -286,11 +268,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vmulhu.vx v16, v8, a0 -; CHECK-NEXT: vsrl.vi v16, v16, 5 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -327,10 +306,7 @@ ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -367,10 +343,7 @@ ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -407,10 +380,7 @@ ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -447,10 +417,7 @@ ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 13 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v26, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -487,10 +454,7 @@ ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 -; CHECK-NEXT: vsrl.vi v28, v28, 13 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v28, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -526,11 +490,8 @@ ; CHECK-NEXT: lui a0, 2 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vmulhu.vx v16, v8, a0 -; CHECK-NEXT: vsrl.vi v16, v16, 13 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -563,14 +524,11 @@ define @vdivu_vi_nxv1i32_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv1i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu -; CHECK-NEXT: vmv.v.i v25, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 29 -; CHECK-NEXT: vmseq.vi v0, v25, 1 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vmulhu.vx v25, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v25, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -603,14 +561,11 @@ define @vdivu_vi_nxv2i32_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv2i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu -; CHECK-NEXT: vmv.v.i v25, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 29 -; CHECK-NEXT: vmseq.vi v0, v25, 1 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmulhu.vx v25, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v25, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -643,14 +598,11 @@ define @vdivu_vi_nxv4i32_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv4i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu -; CHECK-NEXT: vmv.v.i v26, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v28, v8, a0 -; CHECK-NEXT: vsrl.vi v28, v28, 29 -; CHECK-NEXT: vmseq.vi v0, v26, 1 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vmulhu.vx v26, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v26, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -683,14 +635,11 @@ define @vdivu_vi_nxv8i32_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv8i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vmv.v.i v28, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v12, v8, a0 -; CHECK-NEXT: vsrl.vi v12, v12, 29 -; CHECK-NEXT: vmseq.vi v0, v28, 1 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vmulhu.vx v28, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v28, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -723,14 +672,11 @@ define @vdivu_vi_nxv16i32_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv16i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu -; CHECK-NEXT: vmv.v.i v16, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v24, v8, a0 -; CHECK-NEXT: vsrl.vi v24, v24, 29 -; CHECK-NEXT: vmseq.vi v0, v16, 1 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -772,19 +718,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v25, (a0), zero -; CHECK-NEXT: vmv.v.i v26, -7 ; CHECK-NEXT: vmulhu.vv v25, v8, v25 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v25, v25, a0 -; CHECK-NEXT: vmseq.vi v0, v26, 1 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v25, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -827,19 +770,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v26, (a0), zero -; CHECK-NEXT: vmv.v.i v28, -7 ; CHECK-NEXT: vmulhu.vv v26, v8, v26 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v26, v26, a0 -; CHECK-NEXT: vmseq.vi v0, v28, 1 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v26, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -882,19 +822,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v28, (a0), zero -; CHECK-NEXT: vmv.v.i v12, -7 ; CHECK-NEXT: vmulhu.vv v28, v8, v28 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v28, v28, a0 -; CHECK-NEXT: vmseq.vi v0, v12, 1 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v28, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 @@ -937,19 +874,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vmv.v.i v24, -7 -; CHECK-NEXT: vmulhu.vv v16, v8, v16 +; CHECK-NEXT: vmulhu.vv v8, v8, v16 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: vmseq.vi v0, v24, 1 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode-rv64.ll @@ -29,10 +29,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -68,10 +65,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -107,10 +101,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -146,10 +137,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -185,10 +173,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 5 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v26, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -224,10 +209,7 @@ ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 -; CHECK-NEXT: vsrl.vi v28, v28, 5 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v28, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -262,11 +244,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, zero, 33 ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu -; CHECK-NEXT: vmulhu.vx v16, v8, a0 -; CHECK-NEXT: vsrl.vi v16, v16, 5 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 5 ; CHECK-NEXT: ret %head = insertelement undef, i8 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -303,10 +282,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -343,10 +319,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -383,10 +356,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -423,10 +393,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 13 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v26, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -463,10 +430,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 -; CHECK-NEXT: vsrl.vi v28, v28, 13 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v28, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -502,11 +466,8 @@ ; CHECK-NEXT: lui a0, 2 ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu -; CHECK-NEXT: vmulhu.vx v16, v8, a0 -; CHECK-NEXT: vsrl.vi v16, v16, 13 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 13 ; CHECK-NEXT: ret %head = insertelement undef, i16 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -543,10 +504,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 29 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -583,10 +541,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 -; CHECK-NEXT: vsrl.vi v25, v25, 29 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v8, v25, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v25, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -623,10 +578,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 29 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v26, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -663,10 +615,7 @@ ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 -; CHECK-NEXT: vsrl.vi v28, v28, 29 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsrl.vi v8, v28, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -702,11 +651,8 @@ ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addiw a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu -; CHECK-NEXT: vmulhu.vx v16, v8, a0 -; CHECK-NEXT: vsrl.vi v16, v16, 29 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmulhu.vx v8, v8, a0 +; CHECK-NEXT: vsrl.vi v8, v8, 29 ; CHECK-NEXT: ret %head = insertelement undef, i32 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -739,16 +685,13 @@ define @vdivu_vi_nxv1i64_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv1i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.i v25, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v26, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v26, v26, a0 -; CHECK-NEXT: vmseq.vi v0, v25, 1 -; CHECK-NEXT: vmerge.vvm v8, v26, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v25, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -781,16 +724,13 @@ define @vdivu_vi_nxv2i64_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv2i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.i v26, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v28, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v28, v28, a0 -; CHECK-NEXT: vmseq.vi v0, v26, 1 -; CHECK-NEXT: vmerge.vvm v8, v28, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v26, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -823,16 +763,13 @@ define @vdivu_vi_nxv4i64_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv4i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.i v28, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v12, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v12, v12, a0 -; CHECK-NEXT: vmseq.vi v0, v28, 1 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v28, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer @@ -865,16 +802,13 @@ define @vdivu_vi_nxv8i64_0( %va) { ; CHECK-LABEL: vdivu_vi_nxv8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.i v16, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v24, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vmulhu.vx v8, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v24, v24, a0 -; CHECK-NEXT: vmseq.vi v0, v16, 1 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: ret %head = insertelement undef, i64 -7, i32 0 %splat = shufflevector %head, undef, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv32.ll @@ -30,9 +30,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -72,9 +69,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -114,9 +108,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -156,9 +147,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -198,9 +186,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 5 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -240,9 +225,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 5 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -282,9 +264,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 5 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16 @@ -325,9 +304,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -368,9 +344,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -411,9 +384,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -454,9 +424,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 13 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -497,9 +464,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 13 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -540,9 +504,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 13 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16 @@ -578,14 +539,11 @@ define @vremu_vi_nxv1i32_0( %va) { ; CHECK-LABEL: vremu_vi_nxv1i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu -; CHECK-NEXT: vmv.v.i v25, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 29 -; CHECK-NEXT: vmseq.vi v0, v25, 1 -; CHECK-NEXT: vmerge.vvm v25, v26, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu +; CHECK-NEXT: vmulhu.vx v25, v8, a0 +; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -621,14 +579,11 @@ define @vremu_vi_nxv2i32_0( %va) { ; CHECK-LABEL: vremu_vi_nxv2i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu -; CHECK-NEXT: vmv.v.i v25, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v26, v8, a0 -; CHECK-NEXT: vsrl.vi v26, v26, 29 -; CHECK-NEXT: vmseq.vi v0, v25, 1 -; CHECK-NEXT: vmerge.vvm v25, v26, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu +; CHECK-NEXT: vmulhu.vx v25, v8, a0 +; CHECK-NEXT: vsrl.vi v25, v25, 29 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -664,14 +619,11 @@ define @vremu_vi_nxv4i32_0( %va) { ; CHECK-LABEL: vremu_vi_nxv4i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu -; CHECK-NEXT: vmv.v.i v26, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v28, v8, a0 -; CHECK-NEXT: vsrl.vi v28, v28, 29 -; CHECK-NEXT: vmseq.vi v0, v26, 1 -; CHECK-NEXT: vmerge.vvm v26, v28, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu +; CHECK-NEXT: vmulhu.vx v26, v8, a0 +; CHECK-NEXT: vsrl.vi v26, v26, 29 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -707,14 +659,11 @@ define @vremu_vi_nxv8i32_0( %va) { ; CHECK-LABEL: vremu_vi_nxv8i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu -; CHECK-NEXT: vmv.v.i v28, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v12, v8, a0 -; CHECK-NEXT: vsrl.vi v12, v12, 29 -; CHECK-NEXT: vmseq.vi v0, v28, 1 -; CHECK-NEXT: vmerge.vvm v28, v12, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu +; CHECK-NEXT: vmulhu.vx v28, v8, a0 +; CHECK-NEXT: vsrl.vi v28, v28, 29 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -750,14 +699,11 @@ define @vremu_vi_nxv16i32_0( %va) { ; CHECK-LABEL: vremu_vi_nxv16i32_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu -; CHECK-NEXT: vmv.v.i v16, -7 ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v24, v8, a0 -; CHECK-NEXT: vsrl.vi v24, v24, 29 -; CHECK-NEXT: vmseq.vi v0, v16, 1 -; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 +; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu +; CHECK-NEXT: vmulhu.vx v16, v8, a0 +; CHECK-NEXT: vsrl.vi v16, v16, 29 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16 @@ -802,19 +748,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v25, (a0), zero -; CHECK-NEXT: vmv.v.i v26, -7 ; CHECK-NEXT: vmulhu.vv v25, v8, v25 ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v25, v25, a0 -; CHECK-NEXT: vmseq.vi v0, v26, 1 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -860,19 +803,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v26, (a0), zero -; CHECK-NEXT: vmv.v.i v28, -7 ; CHECK-NEXT: vmulhu.vv v26, v8, v26 ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v26, v26, a0 -; CHECK-NEXT: vmseq.vi v0, v28, 1 -; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -918,19 +858,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v28, (a0), zero -; CHECK-NEXT: vmv.v.i v12, -7 ; CHECK-NEXT: vmulhu.vv v28, v8, v28 ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v28, v28, a0 -; CHECK-NEXT: vmseq.vi v0, v12, 1 -; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -976,19 +913,16 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu ; CHECK-NEXT: lui a0, 131072 ; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: sw a0, 8(sp) +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vlse64.v v16, (a0), zero -; CHECK-NEXT: vmv.v.i v24, -7 ; CHECK-NEXT: vmulhu.vv v16, v8, v16 ; CHECK-NEXT: addi a0, zero, 61 ; CHECK-NEXT: vsrl.vx v16, v16, a0 -; CHECK-NEXT: vmseq.vi v0, v24, 1 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode-rv64.ll @@ -30,9 +30,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,mf8,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -72,9 +69,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -114,9 +108,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -156,9 +147,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 5 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -198,9 +186,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 5 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -240,9 +225,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 5 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -282,9 +264,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e8,m8,ta,mu ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 5 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16 @@ -325,9 +304,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,mf4,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -368,9 +344,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -411,9 +384,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 13 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -454,9 +424,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 13 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -497,9 +464,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 13 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -540,9 +504,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e16,m8,ta,mu ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 13 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16 @@ -583,9 +544,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e32,mf2,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -626,9 +584,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e32,m1,ta,mu ; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: vsrl.vi v25, v25, 29 -; CHECK-NEXT: vmv.v.i v26, 1 -; CHECK-NEXT: vmseq.vi v0, v26, -7 -; CHECK-NEXT: vmerge.vvm v25, v25, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -669,9 +624,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e32,m2,ta,mu ; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: vsrl.vi v26, v26, 29 -; CHECK-NEXT: vmv.v.i v28, 1 -; CHECK-NEXT: vmseq.vi v0, v28, -7 -; CHECK-NEXT: vmerge.vvm v26, v26, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -712,9 +664,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e32,m4,ta,mu ; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: vsrl.vi v28, v28, 29 -; CHECK-NEXT: vmv.v.i v12, 1 -; CHECK-NEXT: vmseq.vi v0, v12, -7 -; CHECK-NEXT: vmerge.vvm v28, v28, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -755,9 +704,6 @@ ; CHECK-NEXT: vsetvli a1, zero, e32,m8,ta,mu ; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: vsrl.vi v16, v16, 29 -; CHECK-NEXT: vmv.v.i v24, 1 -; CHECK-NEXT: vmseq.vi v0, v24, -7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16 @@ -793,16 +739,13 @@ define @vremu_vi_nxv1i64_0( %va) { ; CHECK-LABEL: vremu_vi_nxv1i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu -; CHECK-NEXT: vmv.v.i v25, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v26, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m1,ta,mu +; CHECK-NEXT: vmulhu.vx v25, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v26, v26, a0 -; CHECK-NEXT: vmseq.vi v0, v25, 1 -; CHECK-NEXT: vmerge.vvm v25, v26, v8, v0 +; CHECK-NEXT: vsrl.vx v25, v25, a0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v25, v25, a0 ; CHECK-NEXT: vsub.vv v8, v8, v25 @@ -838,16 +781,13 @@ define @vremu_vi_nxv2i64_0( %va) { ; CHECK-LABEL: vremu_vi_nxv2i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu -; CHECK-NEXT: vmv.v.i v26, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v28, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m2,ta,mu +; CHECK-NEXT: vmulhu.vx v26, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v28, v28, a0 -; CHECK-NEXT: vmseq.vi v0, v26, 1 -; CHECK-NEXT: vmerge.vvm v26, v28, v8, v0 +; CHECK-NEXT: vsrl.vx v26, v26, a0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v26, v26, a0 ; CHECK-NEXT: vsub.vv v8, v8, v26 @@ -883,16 +823,13 @@ define @vremu_vi_nxv4i64_0( %va) { ; CHECK-LABEL: vremu_vi_nxv4i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu -; CHECK-NEXT: vmv.v.i v28, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v12, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m4,ta,mu +; CHECK-NEXT: vmulhu.vx v28, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v12, v12, a0 -; CHECK-NEXT: vmseq.vi v0, v28, 1 -; CHECK-NEXT: vmerge.vvm v28, v12, v8, v0 +; CHECK-NEXT: vsrl.vx v28, v28, a0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v28, v28, a0 ; CHECK-NEXT: vsub.vv v8, v8, v28 @@ -928,16 +865,13 @@ define @vremu_vi_nxv8i64_0( %va) { ; CHECK-LABEL: vremu_vi_nxv8i64_0: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu -; CHECK-NEXT: vmv.v.i v16, -7 ; CHECK-NEXT: addi a0, zero, 1 ; CHECK-NEXT: slli a0, a0, 61 ; CHECK-NEXT: addi a0, a0, 1 -; CHECK-NEXT: vmulhu.vx v24, v8, a0 +; CHECK-NEXT: vsetvli a1, zero, e64,m8,ta,mu +; CHECK-NEXT: vmulhu.vx v16, v8, a0 ; CHECK-NEXT: addi a0, zero, 61 -; CHECK-NEXT: vsrl.vx v24, v24, a0 -; CHECK-NEXT: vmseq.vi v0, v16, 1 -; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 +; CHECK-NEXT: vsrl.vx v16, v16, a0 ; CHECK-NEXT: addi a0, zero, -7 ; CHECK-NEXT: vmul.vx v16, v16, a0 ; CHECK-NEXT: vsub.vv v8, v8, v16