diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3157,6 +3157,7 @@ // Don't perform this optimization when optimizing for size, since // materializing elements and inserting them tends to cause code bloat. if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts && + (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) && ((MostCommonCount > DominantValueCountThreshold) || (ValueCounts.size() <= Log2_32(NumDefElts)))) { // Start by splatting the most common element. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll @@ -18,12 +18,10 @@ ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: vfirst.m a1, v0 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll @@ -521,9 +521,8 @@ ; RV32ELEN32-LABEL: bitcast_i64_v4i16: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32ELEN32-NEXT: vmv.v.x v8, a1 -; RV32ELEN32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32ELEN32-NEXT: vmv.s.x v8, a0 +; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a0 +; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a1 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_i64_v4i16: @@ -557,9 +556,8 @@ ; RV32ELEN32-LABEL: bitcast_i64_v2i32: ; RV32ELEN32: # %bb.0: ; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32ELEN32-NEXT: vmv.v.x v8, a1 -; RV32ELEN32-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32ELEN32-NEXT: vmv.s.x v8, a0 +; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a0 +; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a1 ; RV32ELEN32-NEXT: ret ; ; RV64ELEN32-LABEL: bitcast_i64_v2i32: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -191,7 +191,8 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.f.s fa5, v8 +; RV32-NEXT: vslidedown.vi v9, v8, 1 +; RV32-NEXT: vfmv.f.s fa5, v9 ; RV32-NEXT: lui a0, %hi(.LCPI10_0) ; RV32-NEXT: fld fa4, %lo(.LCPI10_0)(a0) ; RV32-NEXT: lui a0, %hi(.LCPI10_1) @@ -202,7 +203,6 @@ ; RV32-NEXT: fmin.d fa5, fa5, fa3 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: feq.d a2, fa5, fa5 ; RV32-NEXT: neg a2, a2 @@ -211,9 +211,8 @@ ; RV32-NEXT: fcvt.w.d a3, fa5, rtz ; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; RV32-NEXT: vmv.s.x v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vse8.v v8, (a1) ; RV32-NEXT: ret ; @@ -221,7 +220,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.f.s fa5, v8 +; RV64-NEXT: vslidedown.vi v9, v8, 1 +; RV64-NEXT: vfmv.f.s fa5, v9 ; RV64-NEXT: lui a0, %hi(.LCPI10_0) ; RV64-NEXT: fld fa4, %lo(.LCPI10_0)(a0) ; RV64-NEXT: lui a0, %hi(.LCPI10_1) @@ -232,7 +232,6 @@ ; RV64-NEXT: fmin.d fa5, fa5, fa3 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: feq.d a2, fa5, fa5 ; RV64-NEXT: neg a2, a2 @@ -241,9 +240,8 @@ ; RV64-NEXT: fcvt.l.d a3, fa5, rtz ; RV64-NEXT: and a2, a2, a3 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; RV64-NEXT: vmv.s.x v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: vse8.v v8, (a1) ; RV64-NEXT: ret %a = load <2 x double>, ptr %x @@ -265,15 +263,16 @@ ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz +; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV32-NEXT: vslide1down.vx v9, v8, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vfmv.f.s fa4, v8 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa5, fa4, fa5 -; RV32-NEXT: fcvt.wu.d a2, fa5, rtz -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v8, a2 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; RV32-NEXT: vmv.s.x v8, a0 +; RV32-NEXT: fcvt.wu.d a0, fa5, rtz +; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV32-NEXT: vslide1down.vx v8, v9, a0 ; RV32-NEXT: vse8.v v8, (a1) ; RV32-NEXT: ret ; @@ -288,15 +287,16 @@ ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz +; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; RV64-NEXT: vslide1down.vx v9, v8, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vfmv.f.s fa4, v8 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa5, fa4, fa5 -; RV64-NEXT: fcvt.lu.d a2, fa5, rtz -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.x v8, a2 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; RV64-NEXT: vmv.s.x v8, a0 +; RV64-NEXT: fcvt.lu.d a0, fa5, rtz +; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64-NEXT: vslide1down.vx v8, v9, a0 ; RV64-NEXT: vse8.v v8, (a1) ; RV64-NEXT: ret %a = load <2 x double>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -140,11 +140,10 @@ ; RV64-LABEL: load_v6f16: ; RV64: # %bb.0: ; RV64-NEXT: ld a2, 0(a1) -; RV64-NEXT: addi a1, a1, 8 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vlse64.v v8, (a1), zero -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: ld a1, 8(a1) +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: sd a2, 0(a0) ; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: addi a0, a0, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -54,10 +54,8 @@ ; CHECK-LABEL: buildvec_mask_nonconst_v2i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -65,10 +63,8 @@ ; ZVE32F-LABEL: buildvec_mask_nonconst_v2i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.v.x v8, a1 -; ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; ZVE32F-NEXT: vmv.s.x v8, a0 -; ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 ; ZVE32F-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1667,20 +1667,18 @@ ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: lw a1, 0(a0) -; RV32ZVE32F-NEXT: addi a0, a0, 8 +; RV32ZVE32F-NEXT: lw a0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV32ZVE32F-NEXT: vlse32.v v9, (a0), zero -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV32ZVE32F-NEXT: vmv.s.x v9, a1 +; RV32ZVE32F-NEXT: vslide1down.vx v9, v8, a1 +; RV32ZVE32F-NEXT: vslide1down.vx v9, v9, a0 ; RV32ZVE32F-NEXT: vsoxei32.v v9, (zero), v8, v0.t ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i32: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 ; RV64ZVE32F-NEXT: andi a1, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -151,9 +151,8 @@ ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vmv.v.x v8, a1 -; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: vslidedown.vi v8, v8, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll @@ -12,14 +12,12 @@ ; ; CHECK-V-LABEL: test: ; CHECK-V: # %bb.0: -; CHECK-V-NEXT: lui a1, 524288 ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-V-NEXT: vmv.v.x v8, a1 -; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-V-NEXT: vmv.s.x v8, a0 -; CHECK-V-NEXT: addiw a1, a1, 2 -; CHECK-V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-V-NEXT: vmslt.vx v0, v8, a1 +; CHECK-V-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-V-NEXT: lui a0, 524288 +; CHECK-V-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-V-NEXT: addiw a0, a0, 2 +; CHECK-V-NEXT: vmslt.vx v0, v8, a0 ; CHECK-V-NEXT: vmv.v.i v8, 0 ; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-V-NEXT: vslidedown.vi v8, v8, 1