Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9720,6 +9720,26 @@ return Gather; break; } + case RISCVISD::VMV_S_X_VL: { + // TODO: Generalize this for VMV_S_F_VL as well. + SDLoc DL(N); + MVT VT = N->getSimpleValueType(0); + SDValue SrcVec = N->getOperand(0); + SDValue VL = N->getOperand(2); + // If we have an insert into a splat, we can use a slide1up instead as + // sliding the splat doesn't change any of the lanes, and this lets us + // avoid a tail undisturbed instruction (and thus a likely vsetvli + // toggle). TODO: Can relax type check to allow any smaller element + // type which repeats at the larger type, even with float vs integer + // mismatch. Can also allow a source vector with a larger VL. + if (SrcVec.getOpcode() == RISCVISD::VMV_V_X_VL && + SrcVec.getSimpleValueType() == VT && SrcVec.getOperand(2) == VL) { + return DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, VT, DAG.getUNDEF(VT), + SrcVec, N->getOperand(1), + getAllOnesMask(VT, VL, DL, DAG), VL); + } + break; + } case RISCVISD::VMV_V_X_VL: { // Tail agnostic VMV.V.X only demands the vector element bitwidth from the // scalar input. Index: llvm/test/CodeGen/RISCV/fold-vector-cmp.ll =================================================================== --- llvm/test/CodeGen/RISCV/fold-vector-cmp.ll +++ llvm/test/CodeGen/RISCV/fold-vector-cmp.ll @@ -15,11 +15,9 @@ ; CHECK-V-NEXT: lui a1, 524288 ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.v.x v8, a1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 +; CHECK-V-NEXT: vslide1up.vx v9, v8, a0 ; CHECK-V-NEXT: addiw a0, a1, 2 -; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-V-NEXT: vmslt.vx v0, v8, a0 +; CHECK-V-NEXT: vmslt.vx v0, v9, a0 ; CHECK-V-NEXT: vmv.v.i v8, 0 ; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll @@ -22,10 +22,8 @@ ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vslide1up.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call <2 x i1> @llvm.experimental.vector.reverse.v2i1(<2 x i1> %a) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -524,9 +524,8 @@ ; RV32ELEN32-LABEL: bitcast_i64_v4i16: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32ELEN32-NEXT: vmv.v.x v8, a1 -; RV32ELEN32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32ELEN32-NEXT: vmv.s.x v8, a0 +; RV32ELEN32-NEXT: vmv.v.x v9, a1 +; RV32ELEN32-NEXT: vslide1up.vx v8, v9, a0 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_i64_v4i16: @@ -563,9 +562,8 @@ ; RV32ELEN32-LABEL: bitcast_i64_v2i32: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32ELEN32-NEXT: vmv.v.x v8, a1 -; RV32ELEN32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32ELEN32-NEXT: vmv.s.x v8, a0 +; RV32ELEN32-NEXT: vmv.v.x v9, a1 +; RV32ELEN32-NEXT: vslide1up.vx v8, v9, a0 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_i64_v2i32: Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -216,9 +216,9 @@ ; RV32-NEXT: fmin.d ft0, ft0, ft1 ; RV32-NEXT: fcvt.w.d a0, ft0, rtz ; RV32-NEXT: .LBB10_4: -; RV32-NEXT: vsetivli zero, 2, e8, mf8, tu, ma -; RV32-NEXT: vmv.s.x v9, a0 -; RV32-NEXT: vse8.v v9, (a1) +; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV32-NEXT: vslide1up.vx v8, v9, a0 +; RV32-NEXT: vse8.v v8, (a1) ; RV32-NEXT: ret ; ; RV64-LABEL: fp2si_v2f64_v2i8: @@ -250,9 +250,9 @@ ; RV64-NEXT: fmin.d ft0, ft0, ft1 ; RV64-NEXT: fcvt.l.d a0, ft0, rtz ; RV64-NEXT: .LBB10_4: -; RV64-NEXT: vsetivli zero, 2, e8, mf8, tu, ma -; RV64-NEXT: vmv.s.x v9, a0 -; RV64-NEXT: vse8.v v9, (a1) +; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64-NEXT: vslide1up.vx v8, v9, a0 +; RV64-NEXT: vse8.v v8, (a1) ; RV64-NEXT: ret %a = load <2 x double>, <2 x double>* %x %d = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> %a) @@ -281,9 +281,8 @@ ; RV32-NEXT: fcvt.wu.d a2, ft0, rtz ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; RV32-NEXT: vmv.s.x v8, a0 -; RV32-NEXT: vse8.v v8, (a1) +; RV32-NEXT: vslide1up.vx v9, v8, a0 +; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: ret ; ; RV64-LABEL: fp2ui_v2f64_v2i8: @@ -305,9 +304,8 @@ ; RV64-NEXT: fcvt.lu.d a2, ft0, rtz ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; RV64-NEXT: vmv.s.x v8, a0 -; RV64-NEXT: vse8.v v8, (a1) +; RV64-NEXT: vslide1up.vx v9, v8, a0 +; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: ret %a = load <2 x double>, <2 x double>* %x %d = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f64(<2 x double> %a) Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -47,10 +47,9 @@ ; RV32-NEXT: addi a4, a0, 20 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vlse32.v v10, (a4), zero -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32-NEXT: vmv.s.x v10, a3 +; RV32-NEXT: vslide1up.vx v12, v10, a3 ; RV32-NEXT: vsetvli zero, zero, e64, m2, tu, ma -; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: vslideup.vi v8, v12, 2 ; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: vslide1up.vx v12, v10, a2 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -379,9 +379,8 @@ ; RV64-NEXT: ld a1, %lo(.LCPI20_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, -1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vslide1up.vx v9, v8, a1 +; RV64-NEXT: vse64.v v9, (a0) ; RV64-NEXT: ret store <2 x i64> , <2 x i64>* %x ret void Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -324,11 +324,9 @@ ; CHECK-LABEL: splat_ve4_ins_i0ve2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vmv.v.i v9, 4 ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslide1up.vx v10, v9, a0 ; CHECK-NEXT: vrgather.vv v9, v8, v10 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret @@ -371,12 +369,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0ve4: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vmv.v.i v10, 2 ; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma -; CHECK-NEXT: vmv.s.x v11, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vslide1up.vx v11, v10, a0 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v11 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -954,9 +954,7 @@ ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmv.s.x v12, a1 +; CHECK-NEXT: vslide1up.vx v12, v11, a1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v11, v9, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -1039,10 +1037,8 @@ ; RV64-NEXT: lui a1, %hi(.LCPI55_1) ; RV64-NEXT: ld a1, %lo(.LCPI55_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmulhu.vv v8, v9, v8 +; RV64-NEXT: vslide1up.vx v10, v8, a1 +; RV64-NEXT: vmulhu.vv v8, v9, v10 ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vadd.vi v9, v9, 1 ; RV64-NEXT: vsrl.vv v8, v8, v9 @@ -1195,10 +1191,9 @@ ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v9, a2 ; RV32-NEXT: addi a1, a1, 1366 -; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32-NEXT: vmv.s.x v9, a1 +; RV32-NEXT: vslide1up.vx v10, v9, a1 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vmulh.vv v9, v8, v9 +; RV32-NEXT: vmulh.vv v9, v8, v10 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vid.v v10 ; RV32-NEXT: vsrl.vi v10, v10, 1 @@ -1228,10 +1223,8 @@ ; RV64-NEXT: lui a1, %hi(.LCPI59_1) ; RV64-NEXT: ld a1, %lo(.LCPI59_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; RV64-NEXT: vmulh.vv v8, v9, v8 +; RV64-NEXT: vslide1up.vx v10, v8, a1 +; RV64-NEXT: vmulh.vv v8, v9, v10 ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vrsub.vi v11, v10, 0 ; RV64-NEXT: vmadd.vv v11, v9, v8 @@ -4721,43 +4714,37 @@ ; LMULMAX1-RV64-LABEL: mulhu_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) +; LMULMAX1-RV64-NEXT: vmv.v.i v9, 0 ; LMULMAX1-RV64-NEXT: li a2, -1 +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI156_0) +; LMULMAX1-RV64-NEXT: addi a3, a3, %lo(.LCPI156_0) +; LMULMAX1-RV64-NEXT: vlse64.v v10, (a3), zero +; LMULMAX1-RV64-NEXT: lui a3, %hi(.LCPI156_1) +; LMULMAX1-RV64-NEXT: ld a3, %lo(.LCPI156_1)(a3) ; LMULMAX1-RV64-NEXT: slli a2, a2, 63 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI156_0) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_1) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI156_1)(a2) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vid.v v10 +; LMULMAX1-RV64-NEXT: vslide1up.vx v11, v9, a2 +; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vslide1up.vx v12, v10, a3 +; LMULMAX1-RV64-NEXT: vmulhu.vv v10, v8, v12 +; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_2) ; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI156_2) -; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero +; LMULMAX1-RV64-NEXT: vlse64.v v10, (a2), zero ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_3) ; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI156_3)(a2) -; LMULMAX1-RV64-NEXT: vadd.vi v12, v10, 2 -; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v12 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 -; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1 -; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vid.v v11 +; LMULMAX1-RV64-NEXT: vadd.vi v12, v11, 2 +; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX1-RV64-NEXT: vslide1up.vx v12, v10, a2 +; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v12 +; LMULMAX1-RV64-NEXT: vadd.vi v10, v11, 1 +; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v10 +; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = udiv <4 x i64> %a, @@ -5078,32 +5065,30 @@ ; LMULMAX1-RV64-LABEL: mulhs_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI160_0) -; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI160_0) -; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero -; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI160_1) -; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI160_1)(a1) -; LMULMAX1-RV64-NEXT: addi a2, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 -; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulh.vv v11, v10, v9 +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI160_0) +; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI160_0) +; LMULMAX1-RV64-NEXT: vlse64.v v8, (a2), zero +; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI160_1) +; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI160_1)(a2) +; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vle64.v v10, (a0) +; LMULMAX1-RV64-NEXT: vslide1up.vx v11, v8, a2 +; LMULMAX1-RV64-NEXT: vmulh.vv v8, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v12 ; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0 -; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v10 -; LMULMAX1-RV64-NEXT: li a1, 63 -; LMULMAX1-RV64-NEXT: vsrl.vx v10, v11, a1 -; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v10, v11, v10 -; LMULMAX1-RV64-NEXT: vmulh.vv v9, v8, v9 -; LMULMAX1-RV64-NEXT: vmacc.vv v9, v8, v13 -; LMULMAX1-RV64-NEXT: vsrl.vx v8, v9, a1 +; LMULMAX1-RV64-NEXT: vmacc.vv v8, v13, v9 +; LMULMAX1-RV64-NEXT: li a2, 63 +; LMULMAX1-RV64-NEXT: vsrl.vx v9, v8, a2 +; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v12 +; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v9 +; LMULMAX1-RV64-NEXT: vmulh.vv v9, v10, v11 +; LMULMAX1-RV64-NEXT: vmacc.vv v9, v10, v13 +; LMULMAX1-RV64-NEXT: vsrl.vx v10, v9, a2 ; LMULMAX1-RV64-NEXT: vsra.vv v9, v9, v12 -; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 -; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v10, (a2) +; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = sdiv <4 x i64> %a, Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -55,10 +55,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vslide1up.vx v9, v8, a0 +; CHECK-NEXT: vand.vi v8, v9, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret ; @@ -66,10 +64,8 @@ ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; ZVE32F-NEXT: vmv.v.x v8, a1 -; ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; ZVE32F-NEXT: vmv.s.x v8, a0 -; ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; ZVE32F-NEXT: vand.vi v8, v8, 1 +; ZVE32F-NEXT: vslide1up.vx v9, v8, a0 +; ZVE32F-NEXT: vand.vi v8, v9, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 ; ZVE32F-NEXT: ret %1 = insertelement <2 x i1> poison, i1 %x, i32 0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1718,17 +1718,15 @@ ; RV32ZVE32F-NEXT: addi a0, a0, 8 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vlse32.v v9, (a0), zero -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32ZVE32F-NEXT: vmv.s.x v9, a1 -; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t +; RV32ZVE32F-NEXT: vslide1up.vx v10, v9, a1 +; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vmv.v.x v9, a1 +; RV64ZVE32F-NEXT: vslide1up.vx v8, v9, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 Index: llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -539,24 +539,19 @@ ; RV32MV-NEXT: vle16.v v8, (a1) ; RV32MV-NEXT: vmv.v.i v9, 10 ; RV32MV-NEXT: li a1, 9 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; RV32MV-NEXT: vmv.s.x v9, a1 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV32MV-NEXT: vle16.v v10, (a1) -; RV32MV-NEXT: vid.v v11 -; RV32MV-NEXT: vsub.vv v8, v8, v11 +; RV32MV-NEXT: lui a2, %hi(.LCPI4_0) +; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_0) +; RV32MV-NEXT: vle16.v v10, (a2) +; RV32MV-NEXT: vslide1up.vx v11, v9, a1 +; RV32MV-NEXT: vid.v v9 +; RV32MV-NEXT: vsub.vv v8, v8, v9 ; RV32MV-NEXT: vmul.vv v8, v8, v10 -; RV32MV-NEXT: vadd.vv v10, v8, v8 -; RV32MV-NEXT: vsll.vv v9, v10, v9 +; RV32MV-NEXT: vadd.vv v9, v8, v8 +; RV32MV-NEXT: vsll.vv v9, v9, v11 ; RV32MV-NEXT: vmv.v.i v10, 0 ; RV32MV-NEXT: li a1, 1 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; RV32MV-NEXT: vmv1r.v v11, v10 -; RV32MV-NEXT: vmv.s.x v11, a1 +; RV32MV-NEXT: vslide1up.vx v11, v10, a1 ; RV32MV-NEXT: li a1, 2047 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) @@ -604,24 +599,19 @@ ; RV64MV-NEXT: vle16.v v8, (a1) ; RV64MV-NEXT: vmv.v.i v9, 10 ; RV64MV-NEXT: li a1, 9 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; RV64MV-NEXT: vmv.s.x v9, a1 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV64MV-NEXT: vle16.v v10, (a1) -; RV64MV-NEXT: vid.v v11 -; RV64MV-NEXT: vsub.vv v8, v8, v11 +; RV64MV-NEXT: lui a2, %hi(.LCPI4_0) +; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_0) +; RV64MV-NEXT: vle16.v v10, (a2) +; RV64MV-NEXT: vslide1up.vx v11, v9, a1 +; RV64MV-NEXT: vid.v v9 +; RV64MV-NEXT: vsub.vv v8, v8, v9 ; RV64MV-NEXT: vmul.vv v8, v8, v10 -; RV64MV-NEXT: vadd.vv v10, v8, v8 -; RV64MV-NEXT: vsll.vv v9, v10, v9 +; RV64MV-NEXT: vadd.vv v9, v8, v8 +; RV64MV-NEXT: vsll.vv v9, v9, v11 ; RV64MV-NEXT: vmv.v.i v10, 0 ; RV64MV-NEXT: li a1, 1 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; RV64MV-NEXT: vmv1r.v v11, v10 -; RV64MV-NEXT: vmv.s.x v11, a1 +; RV64MV-NEXT: vslide1up.vx v11, v10, a1 ; RV64MV-NEXT: li a1, 2047 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_1)