diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2965,13 +2965,14 @@ return SDValue(); // Now we can create our integer vector type. Note that it may be larger // than the resulting mask type: v4i1 would use v1i8 as its integer type. + unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits); MVT IntegerViaVecVT = MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits), - divideCeil(NumElts, NumViaIntegerBits)); + IntegerViaVecElts); uint64_t Bits = 0; unsigned BitPos = 0, IntegerEltIdx = 0; - SDValue Vec = DAG.getUNDEF(IntegerViaVecVT); + SmallVector Elts(IntegerViaVecElts); for (unsigned I = 0; I < NumElts;) { SDValue V = Op.getOperand(I); @@ -2986,14 +2987,15 @@ if (NumViaIntegerBits <= 32) Bits = SignExtend64<32>(Bits); SDValue Elt = DAG.getConstant(Bits, DL, XLenVT); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, - Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT)); + Elts[IntegerEltIdx] = Elt; Bits = 0; BitPos = 0; IntegerEltIdx++; } } + SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts); + if (NumElts < NumViaIntegerBits) { // If we're producing a smaller vector than our minimum legal integer // type, bitcast to the equivalent (known-legal) mask type, and extract diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -27,7 +27,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -42,12 +42,12 @@ ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vid.v v11 ; RV32-NEXT: vrgather.vv v10, v8, v11 +; RV32-NEXT: vadd.vi v8, v11, -1 ; RV32-NEXT: lui a0, 11 ; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vadd.vi v8, v11, -1 ; RV32-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret @@ -57,12 +57,12 @@ ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vid.v v11 ; RV64-NEXT: vrgather.vv v10, v8, v11 +; RV64-NEXT: vadd.vi v8, v11, -1 ; RV64-NEXT: lui a0, 11 ; RV64-NEXT: addiw a0, a0, -1366 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vadd.vi v8, v11, -1 ; RV64-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -80,7 +80,7 @@ ; RV32-NEXT: lui a0, 11 ; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv.v.v v8, v10 @@ -95,7 +95,7 @@ ; RV64-NEXT: lui a0, 11 ; RV64-NEXT: addiw a0, a0, -1366 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv.v.v v8, v10 @@ -110,8 +110,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -126,8 +125,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -143,7 +141,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -159,7 +157,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -181,9 +179,8 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: trn2.v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -198,8 +195,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -214,8 +210,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -237,9 +232,8 @@ define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ; CHECK-LABEL: trn2.v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -261,9 +255,8 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK-LABEL: trn2.v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -278,8 +271,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -294,8 +286,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -317,9 +308,8 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { ; CHECK-LABEL: trn2.v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -334,8 +324,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -350,8 +339,7 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 -; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -367,7 +355,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -383,7 +371,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -58,7 +58,7 @@ ; V-NEXT: lui a3, 983765 ; V-NEXT: addiw a3, a3, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; V-NEXT: vmv.s.x v0, a3 +; V-NEXT: vmv.v.x v0, a3 ; V-NEXT: li a3, 32 ; V-NEXT: li a4, 5 ; V-NEXT: .LBB1_1: # %vector.body @@ -82,7 +82,7 @@ ; ZVE32F-NEXT: lui a3, 983765 ; ZVE32F-NEXT: addiw a3, a3, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a3 +; ZVE32F-NEXT: vmv.v.x v0, a3 ; ZVE32F-NEXT: li a3, 32 ; ZVE32F-NEXT: li a4, 5 ; ZVE32F-NEXT: .LBB1_1: # %vector.body @@ -333,7 +333,7 @@ ; V-NEXT: lui a4, 983765 ; V-NEXT: addiw a4, a4, 873 ; V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; V-NEXT: vmv.s.x v0, a4 +; V-NEXT: vmv.v.x v0, a4 ; V-NEXT: li a4, 5 ; V-NEXT: .LBB6_1: # %vector.body ; V-NEXT: # =>This Inner Loop Header: Depth=1 @@ -357,7 +357,7 @@ ; ZVE32F-NEXT: lui a4, 983765 ; ZVE32F-NEXT: addiw a4, a4, 873 ; ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a4 +; ZVE32F-NEXT: vmv.v.x v0, a4 ; ZVE32F-NEXT: li a4, 5 ; ZVE32F-NEXT: .LBB6_1: # %vector.body ; ZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1395,30 +1395,30 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t -; RV32-NEXT: vsrl.vi v11, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v11, v11, a4, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: li a5, 5 +; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vmv.v.i v13, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v13, v13, a5, v0 +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vand.vv v12, v12, v13, v0.t -; RV32-NEXT: vor.vv v11, v12, v11, v0.t +; RV32-NEXT: vand.vv v11, v11, v12, v0.t +; RV32-NEXT: vsrl.vi v13, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v13, v13, a4, v0.t +; RV32-NEXT: vor.vv v11, v11, v13, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsll.vx v11, v8, a1, v0.t -; RV32-NEXT: vand.vx v12, v8, a3, v0.t -; RV32-NEXT: vsll.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v11, v11, v12, v0.t -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v13, v0.t +; RV32-NEXT: vand.vx v13, v8, a3, v0.t +; RV32-NEXT: vsll.vx v13, v13, a2, v0.t +; RV32-NEXT: vor.vv v11, v11, v13, v0.t +; RV32-NEXT: vand.vx v13, v8, a4, v0.t +; RV32-NEXT: vsll.vi v13, v13, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t +; RV32-NEXT: vor.vv v8, v13, v8, v0.t ; RV32-NEXT: vor.vv v8, v11, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t @@ -1528,30 +1528,30 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: li a4, 5 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: vmv.v.i v11, 0 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v11, v11, a4, v0 +; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v10, v11 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vv v11, v11, v10 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vsll.vx v11, v8, a1 ; RV32-NEXT: vand.vx v12, v8, a3 ; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1665,30 +1665,31 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v14, v14, a4, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: li a5, 85 +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vmv.v.i v18, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v16, v16, v18, v0.t -; RV32-NEXT: vor.vv v14, v16, v14, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v18, v18, a4, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a3, v0.t +; RV32-NEXT: vsll.vx v18, v18, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a4, v0.t +; RV32-NEXT: vsll.vi v18, v18, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v18, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t @@ -1798,30 +1799,31 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: vmv.v.i v14, 0 +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v12, v14 +; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vv v14, v14, v12 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v14, v10 +; RV32-NEXT: vsll.vx v14, v8, a1 ; RV32-NEXT: vand.vx v16, v8, a3 ; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v14, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -1934,34 +1936,35 @@ ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v16, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v20, a4, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t -; RV32-NEXT: lui a5, 5 -; RV32-NEXT: addi a5, a5, 1365 +; RV32-NEXT: vor.vv v20, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: lui a4, 5 +; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vmv.v.i v20, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v28, v28, v20, v0.t -; RV32-NEXT: vor.vv v24, v28, v24, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v28, v28, a4, v0.t +; RV32-NEXT: vor.vv v24, v24, v28, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t ; RV32-NEXT: vand.vx v28, v8, a4, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v20, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -2069,15 +2072,16 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: vand.vv v20, v20, v16 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a4, 4080 @@ -2088,11 +2092,11 @@ ; RV32-NEXT: vand.vx v24, v8, a3 ; RV32-NEXT: vsll.vx v24, v24, a2 ; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vand.vx v24, v8, a4 -; RV32-NEXT: vsll.vi v24, v24, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 @@ -2200,140 +2204,245 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vx v24, v8, a3, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a5, a1, -256 -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: addi a4, a1, -256 +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsll.vx v24, v24, a5, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: lui a7, 1044480 -; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v8, a5, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2427,73 +2536,70 @@ ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: lui a6, 1044480 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: lui a3, 1044480 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a3, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: lui a6, 4080 ; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a6 +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v0, v0, a3 ; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: li a4, 56 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsrl.vx v0, v8, a5 +; RV32-NEXT: lui a6, 16 +; RV32-NEXT: addi a6, a6, -256 +; RV32-NEXT: vand.vx v0, v0, a6 +; RV32-NEXT: vsrl.vx v24, v8, a4 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vx v8, v8, a6 -; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vsll.vx v0, v8, a4 +; RV32-NEXT: vand.vx v8, v8, a6 +; RV32-NEXT: vsll.vx v8, v8, a5 +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 @@ -2578,140 +2684,245 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: li a4, 40 -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vx v24, v8, a3, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a5, a1, -256 -; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: addi a4, a1, -256 +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsll.vx v24, v24, a5, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a6, 4080 -; RV32-NEXT: vand.vx v24, v24, a6, v0.t +; RV32-NEXT: vand.vx v24, v8, a6, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, 349525 -; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: lui a7, 1044480 -; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vmerge.vxm v16, v24, a7, v0 -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vs8r.v v16, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a7, sp, 16 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a7, sp, 16 -; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: csrr a7, vlenb -; RV32-NEXT: li t0, 24 -; RV32-NEXT: mul a7, a7, t0 -; RV32-NEXT: add a7, sp, a7 -; RV32-NEXT: addi a7, a7, 16 -; RV32-NEXT: vl8r.v v16, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 3 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vl8r.v v24, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a7, vlenb ; RV32-NEXT: slli a7, a7, 4 ; RV32-NEXT: add a7, sp, a7 ; RV32-NEXT: addi a7, a7, 16 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v8, a5, v0.t -; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a6, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a5, v0.t +; RV32-NEXT: vand.vx v16, v24, a4, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a6, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 4, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 4, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t -; RV32-NEXT: vor.vv v24, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v24, 1, v0.t -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v24, v8, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2805,73 +3016,70 @@ ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: lui a6, 1044480 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: lui a3, 1044480 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a3, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: lui a6, 4080 ; RV32-NEXT: vsrl.vi v0, v8, 24 -; RV32-NEXT: vand.vx v0, v0, a6 +; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vand.vx v0, v0, a3 ; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: li a4, 56 +; RV32-NEXT: li a5, 40 +; RV32-NEXT: vsrl.vx v0, v8, a5 +; RV32-NEXT: lui a6, 16 +; RV32-NEXT: addi a6, a6, -256 +; RV32-NEXT: vand.vx v0, v0, a6 +; RV32-NEXT: vsrl.vx v24, v8, a4 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vx v8, v8, a6 -; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vsll.vx v0, v8, a4 +; RV32-NEXT: vand.vx v8, v8, a6 +; RV32-NEXT: vsll.vx v8, v8, a5 +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: lui a1, 61681 -; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -165,39 +165,38 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: vmv.s.x v0, a5 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v11 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: vand.vv v10, v10, v9 +; RV32-NEXT: vsrl.vi v11, v8, 24 +; RV32-NEXT: lui a1, 4080 +; RV32-NEXT: vand.vx v11, v11, a1 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsrl.vx v11, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: lui a4, 16 +; RV32-NEXT: addi a4, a4, -256 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: vsll.vi v9, v9, 8 +; RV32-NEXT: vand.vx v11, v8, a1 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsll.vx v8, v8, a3 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -697,39 +696,40 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: li a2, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v12, v8, a2 -; LMULMAX2-RV32-NEXT: lui a3, 16 -; LMULMAX2-RV32-NEXT: addi a3, a3, -256 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV32-NEXT: li a5, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 +; LMULMAX2-RV32-NEXT: li a1, 85 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 -; LMULMAX2-RV32-NEXT: lui a5, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 -; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v10 +; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 24 +; LMULMAX2-RV32-NEXT: lui a1, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v14, v14, a1 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: li a2, 56 +; LMULMAX2-RV32-NEXT: vsrl.vx v14, v8, a2 +; LMULMAX2-RV32-NEXT: li a3, 40 +; LMULMAX2-RV32-NEXT: vsrl.vx v16, v8, a3 +; LMULMAX2-RV32-NEXT: lui a4, 16 +; LMULMAX2-RV32-NEXT: addi a4, a4, -256 +; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 +; LMULMAX2-RV32-NEXT: vor.vv v14, v16, v14 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32-NEXT: vsll.vi v10, v10, 8 +; LMULMAX2-RV32-NEXT: vand.vx v14, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 24 +; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a2 +; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a3 +; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 ; LMULMAX2-RV32-NEXT: addi a1, a1, -241 @@ -828,41 +828,40 @@ ; LMULMAX1-RV32-LABEL: bitreverse_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v10, a2 -; LMULMAX1-RV32-NEXT: li a3, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v10, a3 -; LMULMAX1-RV32-NEXT: lui a4, 16 -; LMULMAX1-RV32-NEXT: addi a4, a4, -256 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v10, 24 -; LMULMAX1-RV32-NEXT: lui a5, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v12, v9, a5 -; LMULMAX1-RV32-NEXT: li a6, 5 -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX1-RV32-NEXT: lui a6, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a6, v0 +; LMULMAX1-RV32-NEXT: lui a2, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v9, v9, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v10, 8 -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 8 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v10, 24 +; LMULMAX1-RV32-NEXT: lui a2, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: li a3, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v12, v10, a3 +; LMULMAX1-RV32-NEXT: li a4, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v13, v10, a4 +; LMULMAX1-RV32-NEXT: lui a5, 16 +; LMULMAX1-RV32-NEXT: addi a5, a5, -256 +; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 ; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v12, v11 -; LMULMAX1-RV32-NEXT: vsll.vx v12, v10, a2 -; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 -; LMULMAX1-RV32-NEXT: vor.vv v12, v12, v13 -; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a5 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: vand.vv v12, v10, v9 +; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 8 +; LMULMAX1-RV32-NEXT: vand.vx v13, v10, a2 ; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v9 -; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 8 +; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v10, a3 +; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a5 +; LMULMAX1-RV32-NEXT: vsll.vx v10, v10, a4 ; LMULMAX1-RV32-NEXT: vor.vv v10, v13, v10 -; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v12 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v10, 4 ; LMULMAX1-RV32-NEXT: lui a6, 61681 @@ -894,26 +893,26 @@ ; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v14 ; LMULMAX1-RV32-NEXT: vadd.vv v10, v10, v10 ; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vsrl.vx v15, v8, a3 -; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a4 -; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v9 ; LMULMAX1-RV32-NEXT: vsrl.vi v15, v8, 24 -; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v16, v16, v9 +; LMULMAX1-RV32-NEXT: vand.vx v15, v15, a2 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v15 +; LMULMAX1-RV32-NEXT: vsrl.vx v15, v8, a3 +; LMULMAX1-RV32-NEXT: vsrl.vx v16, v8, a4 +; LMULMAX1-RV32-NEXT: vand.vx v16, v16, a5 ; LMULMAX1-RV32-NEXT: vor.vv v15, v16, v15 -; LMULMAX1-RV32-NEXT: vor.vv v11, v15, v11 -; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a2 -; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v16, v16, a3 -; LMULMAX1-RV32-NEXT: vor.vv v15, v15, v16 -; LMULMAX1-RV32-NEXT: vand.vx v16, v8, a5 -; LMULMAX1-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v15 +; LMULMAX1-RV32-NEXT: vand.vv v9, v8, v9 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 +; LMULMAX1-RV32-NEXT: vand.vx v15, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v15, v15, 24 +; LMULMAX1-RV32-NEXT: vor.vv v9, v15, v9 +; LMULMAX1-RV32-NEXT: vsll.vx v15, v8, a3 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a4 ; LMULMAX1-RV32-NEXT: vor.vv v8, v15, v8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 4 ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -435,30 +435,30 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v11, v11, a3, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t -; RV32-NEXT: vsrl.vi v11, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v11, v11, a4, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 8, v0.t -; RV32-NEXT: li a5, 5 +; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vmv.v.i v13, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v13, v13, a5, v0 +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vand.vv v12, v12, v13, v0.t -; RV32-NEXT: vor.vv v11, v12, v11, v0.t +; RV32-NEXT: vand.vv v11, v11, v12, v0.t +; RV32-NEXT: vsrl.vi v13, v8, 24, v0.t +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v13, v13, a0, v0.t +; RV32-NEXT: vor.vv v11, v11, v13, v0.t ; RV32-NEXT: vor.vv v10, v11, v10, v0.t ; RV32-NEXT: vsll.vx v11, v8, a1, v0.t -; RV32-NEXT: vand.vx v12, v8, a3, v0.t -; RV32-NEXT: vsll.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v11, v11, v12, v0.t -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v13, v0.t +; RV32-NEXT: vand.vx v13, v8, a3, v0.t +; RV32-NEXT: vsll.vx v13, v13, a2, v0.t +; RV32-NEXT: vor.vv v11, v11, v13, v0.t +; RV32-NEXT: vand.vx v13, v8, a0, v0.t +; RV32-NEXT: vsll.vi v13, v13, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v12, v8, v0.t +; RV32-NEXT: vor.vv v8, v13, v8, v0.t ; RV32-NEXT: vor.vv v8, v11, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: ret @@ -511,30 +511,30 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: li a4, 5 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: vmv.v.i v11, 0 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v11, v11, a4, v0 +; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v10, v11 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: vand.vv v11, v11, v10 ; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vsll.vx v11, v8, a1 ; RV32-NEXT: vand.vx v12, v8, a3 ; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v11, v11, v12 +; RV32-NEXT: vand.vv v10, v8, v10 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: ret ; @@ -591,30 +591,31 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v14, v14, a3, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t -; RV32-NEXT: vsrl.vi v14, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v14, v14, a4, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: li a5, 85 +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t +; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vmv.v.i v18, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v18, v18, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vand.vv v16, v16, v18, v0.t -; RV32-NEXT: vor.vv v14, v16, v14, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vsrl.vi v18, v8, 24, v0.t +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v18, v18, a0, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t ; RV32-NEXT: vor.vv v12, v14, v12, v0.t ; RV32-NEXT: vsll.vx v14, v8, a1, v0.t -; RV32-NEXT: vand.vx v16, v8, a3, v0.t -; RV32-NEXT: vsll.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v14, v14, v16, v0.t -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a3, v0.t +; RV32-NEXT: vsll.vx v18, v18, a2, v0.t +; RV32-NEXT: vor.vv v14, v14, v18, v0.t +; RV32-NEXT: vand.vx v18, v8, a0, v0.t +; RV32-NEXT: vsll.vi v18, v18, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v18, v8, v0.t ; RV32-NEXT: vor.vv v8, v14, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: ret @@ -667,30 +668,31 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: vmv.v.i v14, 0 +; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; RV32-NEXT: vmerge.vxm v12, v12, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v12, v14 +; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: vand.vv v14, v14, v12 ; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vor.vv v10, v14, v10 +; RV32-NEXT: vsll.vx v14, v8, a1 ; RV32-NEXT: vand.vx v16, v8, a3 ; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v14, v14, v16 +; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v14, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: ret ; @@ -746,34 +748,35 @@ ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v20, v20, a3, v0.t -; RV32-NEXT: vor.vv v16, v20, v16, v0.t -; RV32-NEXT: vsrl.vi v20, v8, 24, v0.t -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v20, a4, v0.t -; RV32-NEXT: vsrl.vi v28, v8, 8, v0.t -; RV32-NEXT: lui a5, 5 -; RV32-NEXT: addi a5, a5, 1365 +; RV32-NEXT: vor.vv v20, v20, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: lui a4, 5 +; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vmv.v.i v20, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v20, v20, a5, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vand.vv v28, v28, v20, v0.t -; RV32-NEXT: vor.vv v24, v28, v24, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v28, v8, 24, v0.t +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v28, v28, a0, v0.t +; RV32-NEXT: vor.vv v24, v24, v28, v0.t +; RV32-NEXT: vor.vv v20, v24, v20, v0.t ; RV32-NEXT: vsll.vx v24, v8, a1, v0.t ; RV32-NEXT: vand.vx v28, v8, a3, v0.t ; RV32-NEXT: vsll.vx v28, v28, a2, v0.t ; RV32-NEXT: vor.vv v24, v24, v28, v0.t -; RV32-NEXT: vand.vx v28, v8, a4, v0.t +; RV32-NEXT: vand.vx v28, v8, a0, v0.t ; RV32-NEXT: vsll.vi v28, v28, 24, v0.t -; RV32-NEXT: vand.vv v8, v8, v20, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 8, v0.t ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v20, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64: @@ -824,15 +827,16 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: lui a4, 5 ; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: lui a4, 1044480 ; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: vand.vv v20, v20, v16 ; RV32-NEXT: vsrl.vi v24, v8, 24 ; RV32-NEXT: lui a0, 4080 @@ -843,11 +847,11 @@ ; RV32-NEXT: vand.vx v24, v8, a3 ; RV32-NEXT: vsll.vx v24, v24, a2 ; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: ret @@ -898,112 +902,127 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill -; RV32-NEXT: lui a5, 349525 -; RV32-NEXT: addi a5, a5, 1365 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: li a6, 32 -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vsetvli zero, a6, e32, m8, ta, ma -; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: lui a6, 349525 +; RV32-NEXT: addi a6, a6, 1365 ; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vmerge.vxm v16, v24, a5, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: lui a7, 1044480 +; RV32-NEXT: vmv.v.x v0, a6 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1070,50 +1089,47 @@ ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: lui a2, 1044480 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a2, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vand.vx v0, v0, a0 ; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v0, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v0, v0, a3 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vand.vx v0, v8, a0 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vsll.vx v0, v8, a1 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vsll.vx v8, v8, a2 +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1166,112 +1182,127 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v24, v8, a2, v0.t +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v24, v24, a3, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: slli a4, a4, 4 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill -; RV32-NEXT: lui a5, 349525 -; RV32-NEXT: addi a5, a5, 1365 -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: addi a6, sp, 16 -; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: li a6, 32 -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: vsetvli zero, a6, e32, m8, ta, ma -; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: li a5, 32 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: lui a6, 349525 +; RV32-NEXT: addi a6, a6, 1365 ; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vmerge.vxm v16, v24, a5, v0 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: lui a7, 1044480 +; RV32-NEXT: vmv.v.x v0, a6 +; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v24, v24, a7, v0 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a0, a0, a5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v24, v24, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: vand.vx v24, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v16, v24, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t +; RV32-NEXT: vand.vx v8, v8, a4, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1338,50 +1369,47 @@ ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a4 -; RV32-NEXT: li a4, 32 -; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; RV32-NEXT: lui a4, 1044480 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: lui a2, 1044480 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a2, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: vand.vv v24, v24, v16 -; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vand.vx v0, v0, a0 ; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v0, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v0, v0, a3 +; RV32-NEXT: vsrl.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vand.vx v0, v8, a0 +; RV32-NEXT: vsll.vi v0, v0, 24 +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vsll.vx v0, v8, a1 +; RV32-NEXT: vand.vx v8, v8, a3 +; RV32-NEXT: vsll.vx v8, v8, a2 +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -71,39 +71,38 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 24 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: li a5, 5 -; RV32-NEXT: vmv.s.x v0, a5 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: lui a5, 1044480 -; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: vand.vv v12, v12, v11 -; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: vand.vv v10, v10, v9 +; RV32-NEXT: vsrl.vi v11, v8, 24 +; RV32-NEXT: lui a1, 4080 +; RV32-NEXT: vand.vx v11, v11, a1 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsrl.vx v11, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: lui a4, 16 +; RV32-NEXT: addi a4, a4, -256 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vand.vv v9, v8, v9 +; RV32-NEXT: vsll.vi v9, v9, 8 +; RV32-NEXT: vand.vx v11, v8, a1 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vor.vv v9, v11, v9 +; RV32-NEXT: vsll.vx v11, v8, a2 +; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vsll.vx v8, v8, a3 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -318,39 +317,40 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 56 -; LMULMAX2-RV32-NEXT: vsrl.vx v10, v8, a1 -; LMULMAX2-RV32-NEXT: li a2, 40 -; LMULMAX2-RV32-NEXT: vsrl.vx v12, v8, a2 -; LMULMAX2-RV32-NEXT: lui a3, 16 -; LMULMAX2-RV32-NEXT: addi a3, a3, -256 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV32-NEXT: li a5, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5 +; LMULMAX2-RV32-NEXT: li a1, 85 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 -; LMULMAX2-RV32-NEXT: lui a5, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 -; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 -; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 -; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 +; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v10 +; LMULMAX2-RV32-NEXT: vsrl.vi v14, v8, 24 +; LMULMAX2-RV32-NEXT: lui a1, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v14, v14, a1 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: li a2, 56 +; LMULMAX2-RV32-NEXT: vsrl.vx v14, v8, a2 +; LMULMAX2-RV32-NEXT: li a3, 40 +; LMULMAX2-RV32-NEXT: vsrl.vx v16, v8, a3 +; LMULMAX2-RV32-NEXT: lui a4, 16 +; LMULMAX2-RV32-NEXT: addi a4, a4, -256 +; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 +; LMULMAX2-RV32-NEXT: vor.vv v14, v16, v14 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vand.vv v10, v8, v10 +; LMULMAX2-RV32-NEXT: vsll.vi v10, v10, 8 +; LMULMAX2-RV32-NEXT: vand.vx v14, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 24 +; LMULMAX2-RV32-NEXT: vor.vv v10, v14, v10 +; LMULMAX2-RV32-NEXT: vsll.vx v14, v8, a2 +; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vx v8, v8, a3 +; LMULMAX2-RV32-NEXT: vor.vv v8, v14, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -392,65 +392,64 @@ ; LMULMAX1-RV32-LABEL: bswap_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v8, a2 -; LMULMAX1-RV32-NEXT: li a3, 40 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v8, a3 -; LMULMAX1-RV32-NEXT: lui a4, 16 -; LMULMAX1-RV32-NEXT: addi a4, a4, -256 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 -; LMULMAX1-RV32-NEXT: lui a5, 4080 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV32-NEXT: li a6, 5 -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a6 +; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v12, 0 -; LMULMAX1-RV32-NEXT: lui a6, 1044480 -; LMULMAX1-RV32-NEXT: vmerge.vxm v12, v12, a6, v0 +; LMULMAX1-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX1-RV32-NEXT: lui a2, 1044480 +; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v8, 8 -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v13 -; LMULMAX1-RV32-NEXT: vand.vx v13, v8, a5 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 8 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 24 +; LMULMAX1-RV32-NEXT: lui a2, 4080 +; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: li a3, 56 +; LMULMAX1-RV32-NEXT: vsrl.vx v12, v9, a3 +; LMULMAX1-RV32-NEXT: li a4, 40 +; LMULMAX1-RV32-NEXT: vsrl.vx v13, v9, a4 +; LMULMAX1-RV32-NEXT: lui a5, 16 +; LMULMAX1-RV32-NEXT: addi a5, a5, -256 +; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 +; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v10 +; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 8 +; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a2 ; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v13, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vx v10, v9, a2 -; LMULMAX1-RV32-NEXT: vsrl.vx v11, v9, a3 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a4 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 -; LMULMAX1-RV32-NEXT: vand.vx v11, v11, a5 -; LMULMAX1-RV32-NEXT: vsrl.vi v13, v9, 8 -; LMULMAX1-RV32-NEXT: vand.vv v13, v13, v12 -; LMULMAX1-RV32-NEXT: vor.vv v11, v13, v11 -; LMULMAX1-RV32-NEXT: vor.vv v10, v11, v10 -; LMULMAX1-RV32-NEXT: vsll.vx v11, v9, a2 -; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a4 -; LMULMAX1-RV32-NEXT: vsll.vx v13, v13, a3 -; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v13 -; LMULMAX1-RV32-NEXT: vand.vx v13, v9, a5 -; LMULMAX1-RV32-NEXT: vsll.vi v13, v13, 24 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 +; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 +; LMULMAX1-RV32-NEXT: vsll.vx v13, v9, a3 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a5 +; LMULMAX1-RV32-NEXT: vsll.vx v9, v9, a4 ; LMULMAX1-RV32-NEXT: vor.vv v9, v13, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 8 +; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 24 +; LMULMAX1-RV32-NEXT: vand.vx v12, v12, a2 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: vsrl.vx v12, v8, a3 +; LMULMAX1-RV32-NEXT: vsrl.vx v13, v8, a4 +; LMULMAX1-RV32-NEXT: vand.vx v13, v13, a5 +; LMULMAX1-RV32-NEXT: vor.vv v12, v13, v12 +; LMULMAX1-RV32-NEXT: vor.vv v11, v11, v12 +; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v10 +; LMULMAX1-RV32-NEXT: vsll.vi v10, v10, 8 +; LMULMAX1-RV32-NEXT: vand.vx v12, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v12, v12, 24 +; LMULMAX1-RV32-NEXT: vor.vv v10, v12, v10 +; LMULMAX1-RV32-NEXT: vsll.vx v12, v8, a3 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a5 +; LMULMAX1-RV32-NEXT: vsll.vx v8, v8, a4 +; LMULMAX1-RV32-NEXT: vor.vv v8, v12, v8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: bswap_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -35,9 +35,8 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, <8 x float> %y) optsize { ; LMULMAX1-LABEL: hang_when_merging_stores_after_legalization: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: li a0, 2 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; LMULMAX1-NEXT: vmv.s.x v0, a0 +; LMULMAX1-NEXT: vmv.v.i v0, 2 ; LMULMAX1-NEXT: vrgather.vi v12, v8, 0 ; LMULMAX1-NEXT: vrgather.vi v12, v9, 3, v0.t ; LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma @@ -49,14 +48,15 @@ ; ; LMULMAX2-LABEL: hang_when_merging_stores_after_legalization: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vid.v v12 ; LMULMAX2-NEXT: li a0, 7 ; LMULMAX2-NEXT: vmul.vx v14, v12, a0 ; LMULMAX2-NEXT: vrgather.vv v12, v8, v14 -; LMULMAX2-NEXT: li a0, 12 -; LMULMAX2-NEXT: vmv.s.x v0, a0 ; LMULMAX2-NEXT: vadd.vi v8, v14, -14 +; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-NEXT: vmv.v.i v0, 12 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vrgather.vv v12, v10, v8, v0.t ; LMULMAX2-NEXT: vmv1r.v v8, v12 ; LMULMAX2-NEXT: ret @@ -150,10 +150,9 @@ define void @buildvec_merge0_v4f32(<4 x float>* %x, float %f) { ; CHECK-LABEL: buildvec_merge0_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 6 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: lui a1, 262144 ; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 ; CHECK-NEXT: vse32.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -42,10 +42,11 @@ ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v14, v9, 1 -; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14 -; RV32-V128-NEXT: li a0, 10 -; RV32-V128-NEXT: vmv.s.x v0, a0 +; RV32-V128-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-V128-NEXT: vmv.v.i v0, 10 +; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t ; RV32-V128-NEXT: vmv.v.v v8, v10 ; RV32-V128-NEXT: ret @@ -53,12 +54,13 @@ ; RV64-V128-LABEL: interleave_v2f64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 -; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 ; RV64-V128-NEXT: vrgather.vv v10, v8, v14 -; RV64-V128-NEXT: li a0, 10 -; RV64-V128-NEXT: vmv.s.x v0, a0 +; RV64-V128-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-V128-NEXT: vmv.v.i v0, 10 +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vrgather.vv v10, v12, v14, v0.t ; RV64-V128-NEXT: vmv.v.v v8, v10 ; RV64-V128-NEXT: ret @@ -69,8 +71,7 @@ ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: li a0, 10 -; RV32-V512-NEXT: vmv.s.x v0, a0 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 @@ -81,8 +82,7 @@ ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: li a0, 10 -; RV64-V512-NEXT: vmv.s.x v0, a0 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 @@ -255,56 +255,48 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 -; RV32-V128-NEXT: add a0, sp, a0 -; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-V128-NEXT: vle32.v v24, (a0) +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-V128-NEXT: vle32.v v0, (a0) +; RV32-V128-NEXT: vmv8r.v v24, v8 +; RV32-V128-NEXT: addi a0, sp, 16 +; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vrgather.vv v8, v24, v0 ; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV32-V128-NEXT: vle32.v v16, (a0) +; RV32-V128-NEXT: vle32.v v24, (a0) ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, 699051 ; RV32-V128-NEXT: addi a0, a0, -1366 -; RV32-V128-NEXT: vmv.s.x v0, a0 -; RV32-V128-NEXT: vrgather.vv v16, v8, v24 -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 -; RV32-V128-NEXT: add a0, sp, a0 -; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-V128-NEXT: vmv.v.x v0, a0 +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV32-V128-NEXT: vmv.v.v v24, v8 ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV32-V128-NEXT: vmv8r.v v8, v0 +; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -314,56 +306,48 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 -; RV64-V128-NEXT: add a0, sp, a0 -; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-V128-NEXT: vle32.v v24, (a0) +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-V128-NEXT: vle32.v v0, (a0) +; RV64-V128-NEXT: vmv8r.v v24, v8 +; RV64-V128-NEXT: addi a0, sp, 16 +; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vrgather.vv v8, v24, v0 ; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV64-V128-NEXT: vle32.v v16, (a0) +; RV64-V128-NEXT: vle32.v v24, (a0) ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, 699051 ; RV64-V128-NEXT: addiw a0, a0, -1366 -; RV64-V128-NEXT: vmv.s.x v0, a0 -; RV64-V128-NEXT: vrgather.vv v16, v8, v24 -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 -; RV64-V128-NEXT: add a0, sp, a0 -; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-V128-NEXT: vmv.v.x v0, a0 +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-V128-NEXT: vmv.v.v v24, v8 ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV64-V128-NEXT: vmv8r.v v8, v0 +; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -5,9 +5,9 @@ define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) { ; CHECK-LABEL: shuffle_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 11 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 11 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x half> %x, <4 x half> %y, <4 x i32> @@ -18,8 +18,9 @@ ; CHECK-LABEL: shuffle_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 236 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> @@ -27,49 +28,29 @@ } define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { -; RV32-LABEL: shuffle_fv_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 9 -; RV32-NEXT: lui a1, %hi(.LCPI2_0) -; RV32-NEXT: fld fa5, %lo(.LCPI2_0)(a1) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 -; RV32-NEXT: ret -; -; RV64-LABEL: shuffle_fv_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI2_0) -; RV64-NEXT: fld fa5, %lo(.LCPI2_0)(a0) -; RV64-NEXT: li a0, 9 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vfmerge.vfm v8, v8, fa5, v0 -; RV64-NEXT: ret +; CHECK-LABEL: shuffle_fv_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 9 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret %s = shufflevector <4 x double> , <4 x double> %x, <4 x i32> ret <4 x double> %s } define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { -; RV32-LABEL: shuffle_vf_v4f64: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 6 -; RV32-NEXT: lui a1, %hi(.LCPI3_0) -; RV32-NEXT: fld fa5, %lo(.LCPI3_0)(a1) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 -; RV32-NEXT: ret -; -; RV64-LABEL: shuffle_vf_v4f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-NEXT: fld fa5, %lo(.LCPI3_0)(a0) -; RV64-NEXT: li a0, 6 -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vfmerge.vfm v8, v8, fa5, v0 -; RV64-NEXT: ret +; CHECK-LABEL: shuffle_vf_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> ret <4 x double> %s } @@ -127,11 +108,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI6_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI6_0) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vle16.v v14, (a0) -; RV32-NEXT: li a0, 8 -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v12, v8, v14 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.i v0, 8 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-NEXT: vrgather.vi v12, v10, 1, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret @@ -140,11 +122,12 @@ ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI6_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI6_0) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v14, (a0) -; RV64-NEXT: li a0, 8 -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vrgather.vv v12, v8, v14 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 8 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vrgather.vi v12, v10, 1, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret @@ -155,14 +138,13 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 12 ; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vid.v v12 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vid.v v12 ; RV32-NEXT: vrsub.vi v12, v12, 4 +; RV32-NEXT: vmv.v.i v0, 12 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 @@ -170,14 +152,15 @@ ; ; RV64-LABEL: vrgather_shuffle_xv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: vrsub.vi v12, v10, 4 ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) ; RV64-NEXT: vlse64.v v10, (a0), zero -; RV64-NEXT: li a0, 12 -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vi v12, v12, 4 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 12 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vrgather.vv v10, v8, v12, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -195,7 +178,7 @@ ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0) ; RV32-NEXT: vlse64.v v10, (a1), zero ; RV32-NEXT: vmul.vx v12, v12, a0 -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.i v0, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; RV32-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; RV32-NEXT: vmv.v.v v8, v10 @@ -203,14 +186,16 @@ ; ; RV64-LABEL: vrgather_shuffle_vx_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; RV64-NEXT: vid.v v12 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: li a0, 3 +; RV64-NEXT: vmul.vx v12, v10, a0 ; RV64-NEXT: lui a0, %hi(.LCPI8_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI8_0) ; RV64-NEXT: vlse64.v v10, (a0), zero -; RV64-NEXT: li a0, 3 -; RV64-NEXT: vmv.s.x v0, a0 -; RV64-NEXT: vmul.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 3 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-NEXT: vrgather.vv v10, v8, v12, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -450,13 +450,13 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 73 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.v.x v0, a1 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v8, 3 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: li a1, 146 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmv.v.x v0, a1 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -55,10 +55,11 @@ ; RV32-V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-V128-NEXT: vid.v v9 ; RV32-V128-NEXT: vsrl.vi v14, v9, 1 -; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v14 -; RV32-V128-NEXT: li a0, 10 -; RV32-V128-NEXT: vmv.s.x v0, a0 +; RV32-V128-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-V128-NEXT: vmv.v.i v0, 10 +; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV32-V128-NEXT: vrgatherei16.vv v10, v12, v14, v0.t ; RV32-V128-NEXT: vmv.v.v v8, v10 ; RV32-V128-NEXT: ret @@ -66,12 +67,13 @@ ; RV64-V128-LABEL: interleave_v2i64: ; RV64-V128: # %bb.0: ; RV64-V128-NEXT: vmv1r.v v12, v9 -; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-V128-NEXT: vid.v v10 ; RV64-V128-NEXT: vsrl.vi v14, v10, 1 ; RV64-V128-NEXT: vrgather.vv v10, v8, v14 -; RV64-V128-NEXT: li a0, 10 -; RV64-V128-NEXT: vmv.s.x v0, a0 +; RV64-V128-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-V128-NEXT: vmv.v.i v0, 10 +; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; RV64-V128-NEXT: vrgather.vv v10, v12, v14, v0.t ; RV64-V128-NEXT: vmv.v.v v8, v10 ; RV64-V128-NEXT: ret @@ -82,8 +84,7 @@ ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: li a0, 10 -; RV32-V512-NEXT: vmv.s.x v0, a0 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 @@ -94,8 +95,7 @@ ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: li a0, 10 -; RV64-V512-NEXT: vmv.s.x v0, a0 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 @@ -206,8 +206,7 @@ ; V128-NEXT: vid.v v10 ; V128-NEXT: vsrl.vi v11, v10, 1 ; V128-NEXT: vrgather.vv v10, v8, v11 -; V128-NEXT: li a0, 10 -; V128-NEXT: vmv.s.x v0, a0 +; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vadd.vi v8, v11, 1 ; V128-NEXT: vrgather.vv v10, v9, v8, v0.t ; V128-NEXT: vmv.v.v v8, v10 @@ -219,8 +218,7 @@ ; V512-NEXT: vid.v v10 ; V512-NEXT: vsrl.vi v11, v10, 1 ; V512-NEXT: vrgather.vv v10, v8, v11 -; V512-NEXT: li a0, 10 -; V512-NEXT: vmv.s.x v0, a0 +; V512-NEXT: vmv.v.i v0, 10 ; V512-NEXT: vadd.vi v8, v11, 1 ; V512-NEXT: vrgather.vv v10, v9, v8, v0.t ; V512-NEXT: vmv1r.v v8, v10 @@ -416,56 +414,48 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 -; RV32-V128-NEXT: add a0, sp, a0 -; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV32-V128-NEXT: vle32.v v24, (a0) +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-V128-NEXT: vle32.v v0, (a0) +; RV32-V128-NEXT: vmv8r.v v24, v8 +; RV32-V128-NEXT: addi a0, sp, 16 +; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vrgather.vv v8, v24, v0 ; RV32-V128-NEXT: lui a0, %hi(.LCPI17_1) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV32-V128-NEXT: vle32.v v16, (a0) +; RV32-V128-NEXT: vle32.v v24, (a0) ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, 699051 ; RV32-V128-NEXT: addi a0, a0, -1366 -; RV32-V128-NEXT: vmv.s.x v0, a0 -; RV32-V128-NEXT: vrgather.vv v16, v8, v24 -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 -; RV32-V128-NEXT: add a0, sp, a0 -; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-V128-NEXT: vmv.v.x v0, a0 +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV32-V128-NEXT: vmv.v.v v24, v8 ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV32-V128-NEXT: vmv8r.v v8, v0 +; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -475,56 +465,48 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 -; RV64-V128-NEXT: add a0, sp, a0 -; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; RV64-V128-NEXT: vle32.v v24, (a0) +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-V128-NEXT: vle32.v v0, (a0) +; RV64-V128-NEXT: vmv8r.v v24, v8 +; RV64-V128-NEXT: addi a0, sp, 16 +; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vrgather.vv v8, v24, v0 ; RV64-V128-NEXT: lui a0, %hi(.LCPI17_1) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV64-V128-NEXT: vle32.v v16, (a0) +; RV64-V128-NEXT: vle32.v v24, (a0) ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, 699051 ; RV64-V128-NEXT: addiw a0, a0, -1366 -; RV64-V128-NEXT: vmv.s.x v0, a0 -; RV64-V128-NEXT: vrgather.vv v16, v8, v24 -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 -; RV64-V128-NEXT: add a0, sp, a0 -; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-V128-NEXT: vmv.v.x v0, a0 +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-V128-NEXT: vmv.v.v v24, v8 ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV64-V128-NEXT: vmv8r.v v8, v0 +; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -5,9 +5,9 @@ define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-LABEL: shuffle_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 11 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 11 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> %y, <4 x i32> @@ -18,8 +18,9 @@ ; CHECK-LABEL: shuffle_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 203 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> @@ -29,9 +30,9 @@ define <4 x i16> @shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: shuffle_xv_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 9 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 9 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> , <4 x i16> %x, <4 x i32> @@ -41,9 +42,9 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK-LABEL: shuffle_vx_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 6 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vmerge.vim v8, v8, 5, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> @@ -85,8 +86,7 @@ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: li a0, 8 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -98,11 +98,10 @@ define <4 x i16> @vrgather_shuffle_xv_v4i16(<4 x i16> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 12 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: vrsub.vi v10, v9, 4 +; CHECK-NEXT: vmv.v.i v0, 12 ; CHECK-NEXT: vmv.v.i v9, 5 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 @@ -118,7 +117,7 @@ ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: li a0, 3 ; CHECK-NEXT: vmul.vx v10, v9, a0 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmv.v.i v9, 5 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 @@ -180,32 +179,36 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, 5 -; RV32-NEXT: vmv.v.i v20, 2 -; RV32-NEXT: vslideup.vi v20, v16, 7 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vle16.v v21, (a0) -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vle16.v v20, (a0) +; RV32-NEXT: vmv.v.i v21, 2 +; RV32-NEXT: vslideup.vi v21, v16, 7 +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: li a0, 164 -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v16, v8, v21 -; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 5 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v16, a0 ; RV64-NEXT: vmv.v.i v20, 2 ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0) ; RV64-NEXT: vle64.v v24, (a0) ; RV64-NEXT: vslideup.vi v20, v16, 7 -; RV64-NEXT: li a0, 164 -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: li a0, 164 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vv v16, v12, v20, v0.t ; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: ret @@ -218,27 +221,31 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vmv.v.i v20, -1 +; RV32-NEXT: vrgatherei16.vv v12, v20, v16 ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) -; RV32-NEXT: vle16.v v17, (a0) +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: li a0, 113 -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v12, v20, v16 -; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI12_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: li a0, 113 -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -252,14 +259,16 @@ ; RV32: # %bb.0: ; RV32-NEXT: lui a0, %hi(.LCPI13_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vrgatherei16.vv v12, v8, v16 ; RV32-NEXT: lui a0, %hi(.LCPI13_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: li a0, 140 -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV32-NEXT: vmv.v.i v16, 5 ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v12 @@ -267,12 +276,14 @@ ; ; RV64-LABEL: vrgather_shuffle_vx_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: lui a0, %hi(.LCPI13_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: li a0, 115 -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.x v0, a0 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vmv.v.i v12, 5 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -377,9 +388,9 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -398,7 +409,7 @@ ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -410,12 +421,12 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0we4: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 67 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: li a0, 67 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -433,7 +444,7 @@ ; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: li a0, 66 -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vrgather.vv v10, v8, v11 ; RV32-NEXT: vrgather.vi v10, v9, 0, v0.t ; RV32-NEXT: vmv1r.v v8, v10 @@ -447,7 +458,7 @@ ; RV64-NEXT: vmv.v.x v11, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: li a0, 66 -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vrgather.vv v10, v8, v11 ; RV64-NEXT: vrgather.vi v10, v9, 0, v0.t ; RV64-NEXT: vmv1r.v v8, v10 @@ -464,9 +475,9 @@ ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 -; CHECK-NEXT: li a0, 70 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: li a0, 70 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -489,7 +500,7 @@ ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: li a0, 98 -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vrgather.vv v10, v8, v12 ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv1r.v v8, v10 @@ -508,7 +519,7 @@ ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: li a0, 98 -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv1r.v v8, v10 @@ -661,7 +672,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 224 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -4 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -677,7 +688,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 144 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -4 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -724,7 +735,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vadd.vi v12, v11, 1 ; CHECK-NEXT: li a0, 195 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -742,7 +753,7 @@ ; CHECK-NEXT: vadd.vi v12, v11, 2 ; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 234 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vadd.vi v8, v11, -1 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -762,7 +773,7 @@ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) ; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: li a0, 234 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -229,9 +229,8 @@ ; ; LMULMAX1-RV32-LABEL: splat_v4i64: ; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: li a3, 5 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a3 +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vmv.v.x v8, a2 ; LMULMAX1-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 @@ -822,10 +821,12 @@ ; LMULMAX2-RV32-NEXT: vle64.v v12, (a0) ; LMULMAX2-RV32-NEXT: addi a0, a0, 32 ; LMULMAX2-RV32-NEXT: vle64.v v14, (a0) -; LMULMAX2-RV32-NEXT: li a0, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v16, a2 +; LMULMAX2-RV32-NEXT: li a0, 85 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a0 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vadd.vv v14, v14, v16 @@ -859,9 +860,8 @@ ; LMULMAX1-RV32-NEXT: vle64.v v14, (a0) ; LMULMAX1-RV32-NEXT: addi a0, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v15, (a0) -; LMULMAX1-RV32-NEXT: li a0, 5 -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: vmv.v.x v16, a2 ; LMULMAX1-RV32-NEXT: vmerge.vxm v16, v16, a1, v0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1105,45 +1105,45 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: li a1, 513 +; RV32-NEXT: lui a1, 3 +; RV32-NEXT: addi a1, a1, -2044 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vmv.v.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 4 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: li a1, -128 +; RV32-NEXT: vmerge.vxm v10, v9, a1, v0 ; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a2, a1, 78 +; RV32-NEXT: addi a2, a1, 32 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 3, v0 -; RV32-NEXT: lui a2, 8 -; RV32-NEXT: addi a2, a2, 304 +; RV32-NEXT: lui a2, %hi(.LCPI65_0) +; RV32-NEXT: addi a2, a2, %lo(.LCPI65_0) +; RV32-NEXT: vle8.v v11, (a2) +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsrl.vv v9, v8, v9 +; RV32-NEXT: vmulhu.vv v9, v9, v11 +; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: vmulhu.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: li a2, 513 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 2, v0 -; RV32-NEXT: lui a2, 3 -; RV32-NEXT: addi a2, a2, -2044 +; RV32-NEXT: vmv.v.i v9, 4 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: addi a1, a1, 78 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a2 +; RV32-NEXT: vmv.v.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: li a2, -128 -; RV32-NEXT: vmerge.vxm v11, v10, a2, v0 -; RV32-NEXT: addi a1, a1, 32 +; RV32-NEXT: vmerge.vim v9, v9, 3, v0 +; RV32-NEXT: lui a1, 8 +; RV32-NEXT: addi a1, a1, 304 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vmv.v.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI65_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI65_0) -; RV32-NEXT: vle8.v v12, (a1) -; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsrl.vv v10, v8, v10 -; RV32-NEXT: vmulhu.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vmulhu.vv v8, v8, v11 -; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: vmerge.vim v9, v9, 2, v0 ; RV32-NEXT: vsrl.vv v8, v8, v9 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret @@ -1152,45 +1152,45 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: li a1, 513 +; RV64-NEXT: lui a1, 3 +; RV64-NEXT: addiw a1, a1, -2044 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 4 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: li a1, -128 +; RV64-NEXT: vmerge.vxm v10, v9, a1, v0 ; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a2, a1, 78 +; RV64-NEXT: addiw a2, a1, 32 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vmv.v.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 3, v0 -; RV64-NEXT: lui a2, 8 -; RV64-NEXT: addiw a2, a2, 304 +; RV64-NEXT: lui a2, %hi(.LCPI65_0) +; RV64-NEXT: addi a2, a2, %lo(.LCPI65_0) +; RV64-NEXT: vle8.v v11, (a2) +; RV64-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-NEXT: vsrl.vv v9, v8, v9 +; RV64-NEXT: vmulhu.vv v9, v9, v11 +; RV64-NEXT: vsub.vv v8, v8, v9 +; RV64-NEXT: vmulhu.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: li a2, 513 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vmv.v.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 2, v0 -; RV64-NEXT: lui a2, 3 -; RV64-NEXT: addiw a2, a2, -2044 +; RV64-NEXT: vmv.v.i v9, 4 +; RV64-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-NEXT: addiw a1, a1, 78 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: li a2, -128 -; RV64-NEXT: vmerge.vxm v11, v10, a2, v0 -; RV64-NEXT: addiw a1, a1, 32 +; RV64-NEXT: vmerge.vim v9, v9, 3, v0 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: addiw a1, a1, 304 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI65_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI65_0) -; RV64-NEXT: vle8.v v12, (a1) -; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsrl.vv v10, v8, v10 -; RV64-NEXT: vmulhu.vv v10, v10, v12 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vmulhu.vv v8, v8, v11 -; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: vmerge.vim v9, v9, 2, v0 ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret @@ -1205,31 +1205,30 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: li a1, 33 +; CHECK-NEXT: vmv.v.x v0, a1 +; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma +; CHECK-NEXT: vslideup.vi v9, v10, 6 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 1 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.s.x v12, a1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vslideup.vi v11, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI66_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) -; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 -; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v10 -; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: li a1, 33 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vmv.v.i v9, 3 -; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 -; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vsrl.vv v11, v8, v11 +; CHECK-NEXT: vmulhu.vv v10, v11, v10 +; CHECK-NEXT: vsub.vv v8, v8, v10 +; CHECK-NEXT: vmulhu.vv v8, v8, v12 +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret @@ -1349,18 +1348,18 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: li a1, -123 +; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: lui a1, 5 ; RV32-NEXT: addi a1, a1, -1452 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vmv.v.x v0, a1 +; RV32-NEXT: li a1, 57 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 +; RV32-NEXT: vmulhu.vv v8, v8, v9 ; RV32-NEXT: vmv.v.i v9, 7 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: li a1, -123 -; RV32-NEXT: vmv.v.x v10, a1 -; RV32-NEXT: li a1, 57 -; RV32-NEXT: vmerge.vxm v10, v10, a1, v0 -; RV32-NEXT: vmulhu.vv v8, v8, v10 ; RV32-NEXT: vsrl.vv v8, v8, v9 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret @@ -1369,18 +1368,18 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: li a1, -123 +; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: lui a1, 5 ; RV64-NEXT: addiw a1, a1, -1452 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: li a1, 57 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vmerge.vxm v9, v9, a1, v0 +; RV64-NEXT: vmulhu.vv v8, v8, v9 ; RV64-NEXT: vmv.v.i v9, 7 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: li a1, -123 -; RV64-NEXT: vmv.v.x v10, a1 -; RV64-NEXT: li a1, 57 -; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 -; RV64-NEXT: vmulhu.vv v8, v8, v10 ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret @@ -1395,11 +1394,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: li a1, 105 -; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 5 ; RV32-NEXT: addi a1, a1, -1755 ; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: li a1, 105 +; RV32-NEXT: vmv.v.x v0, a1 ; RV32-NEXT: lui a1, 1048571 ; RV32-NEXT: addi a1, a1, 1755 ; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 @@ -1414,11 +1413,11 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV64-NEXT: vle16.v v8, (a0) -; RV64-NEXT: li a1, 105 -; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: lui a1, 5 ; RV64-NEXT: addiw a1, a1, -1755 ; RV64-NEXT: vmv.v.x v9, a1 +; RV64-NEXT: li a1, 105 +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: lui a1, 1048571 ; RV64-NEXT: addiw a1, a1, 1755 ; RV64-NEXT: vmerge.vxm v9, v9, a1, v0 @@ -1439,25 +1438,24 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v9, -7 +; CHECK-NEXT: vmerge.vim v9, v9, 7, v0 +; CHECK-NEXT: vdiv.vv v9, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 7 -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vmv.v.i v10, 7 +; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: li a1, -14 -; CHECK-NEXT: vmadd.vx v10, a1, v9 +; CHECK-NEXT: vmadd.vx v11, a1, v10 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 4 +; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vdiv.vv v9, v9, v10 -; CHECK-NEXT: li a1, 6 -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, -7 -; CHECK-NEXT: vmerge.vim v10, v10, 7, v0 -; CHECK-NEXT: vdiv.vv v8, v8, v10 +; CHECK-NEXT: vdiv.vv v8, v8, v11 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vslideup.vi v9, v8, 4 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vse16.v v9, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = sdiv <6 x i16> %a, @@ -1470,11 +1468,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: li a1, 5 -; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 419430 ; RV32-NEXT: addi a1, a1, 1639 ; RV32-NEXT: vmv.v.x v9, a1 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: lui a1, 629146 ; RV32-NEXT: addi a1, a1, -1639 ; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 @@ -4955,45 +4952,45 @@ ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV32-NEXT: lui a2, 163907 +; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV32-NEXT: li a2, -128 +; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV32-NEXT: lui a2, 66049 ; LMULMAX2-RV32-NEXT: addi a2, a2, 32 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV32-NEXT: vle8.v v10, (a2) -; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV32-NEXT: vmerge.vim v14, v12, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v14, v8, v14 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v14, v10 +; LMULMAX2-RV32-NEXT: vle8.v v14, (a2) +; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v10, v8, v10 +; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: lui a2, 163907 -; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, -128 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a2, v0 ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV32-NEXT: lui a2, 8208 ; LMULMAX2-RV32-NEXT: addi a2, a2, 513 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV32-NEXT: lui a2, 66785 ; LMULMAX2-RV32-NEXT: addi a2, a2, 78 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 3, v0 ; LMULMAX2-RV32-NEXT: lui a2, 529160 ; LMULMAX2-RV32-NEXT: addi a2, a2, 304 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 2, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 @@ -5005,45 +5002,45 @@ ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0 +; LMULMAX2-RV64-NEXT: lui a2, 163907 +; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV64-NEXT: li a2, -128 +; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV64-NEXT: lui a2, 66049 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV64-NEXT: vle8.v v10, (a2) -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV64-NEXT: vmerge.vim v14, v12, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v14, v8, v14 -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v14, v10 +; LMULMAX2-RV64-NEXT: vle8.v v14, (a2) +; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: lui a2, 163907 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, -128 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a2, v0 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV64-NEXT: lui a2, 8208 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV64-NEXT: lui a2, 66785 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 78 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 3, v0 ; LMULMAX2-RV64-NEXT: lui a2, 529160 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 304 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 2, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 @@ -5075,32 +5072,38 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 2 -; LMULMAX2-RV32-NEXT: addi a1, a1, 289 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3 +; LMULMAX2-RV32-NEXT: li a1, 257 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV32-NEXT: lui a1, 1048568 +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v12, a1, v0 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 -; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0 -; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 -; LMULMAX2-RV32-NEXT: li a1, 257 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) ; LMULMAX2-RV32-NEXT: vle16.v v16, (a1) -; LMULMAX2-RV32-NEXT: lui a1, 1048568 -; LMULMAX2-RV32-NEXT: vmerge.vxm v18, v14, a1, v0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v8, v14, 1, v0 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v12, v10, v12 +; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v12, v16 +; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v12 +; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v12 +; LMULMAX2-RV32-NEXT: lui a1, 2 +; LMULMAX2-RV32-NEXT: addi a1, a1, 289 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0 +; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v12, 1, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v16 -; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v18 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -5108,32 +5111,38 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) -; LMULMAX2-RV64-NEXT: lui a1, 2 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3 +; LMULMAX2-RV64-NEXT: li a1, 257 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 +; LMULMAX2-RV64-NEXT: lui a1, 1048568 +; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v12, a1, v0 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 -; LMULMAX2-RV64-NEXT: vmv.s.x v8, a1 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0 -; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 -; LMULMAX2-RV64-NEXT: li a1, 257 -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) ; LMULMAX2-RV64-NEXT: vle16.v v16, (a1) -; LMULMAX2-RV64-NEXT: lui a1, 1048568 -; LMULMAX2-RV64-NEXT: vmerge.vxm v18, v14, a1, v0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV64-NEXT: vmerge.vim v8, v14, 1, v0 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v12, v10, v12 +; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v12, v16 +; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v12 +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV64-NEXT: vadd.vv v10, v10, v12 +; LMULMAX2-RV64-NEXT: lui a1, 2 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0 +; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 +; LMULMAX2-RV64-NEXT: vmerge.vim v8, v12, 1, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v16 -; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v18 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5163,7 +5172,9 @@ ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: li a1, 68 -; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-NEXT: vmv.v.x v0, a1 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) ; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) ; LMULMAX2-NEXT: vle32.v v10, (a1) @@ -5175,7 +5186,9 @@ ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 136 -; LMULMAX2-NEXT: vmv.s.x v0, a1 +; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-NEXT: vmv.v.x v0, a1 +; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vmv.v.i v10, 2 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 @@ -5368,18 +5381,18 @@ ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a2, -123 -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: lui a2, 304453 ; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, 57 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV32-NEXT: li a1, -123 +; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV32-NEXT: li a1, 57 +; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -5389,18 +5402,18 @@ ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a2, -123 -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: lui a2, 304453 ; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, 57 +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 +; LMULMAX2-RV64-NEXT: li a1, -123 +; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV64-NEXT: li a1, 57 +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -5414,7 +5427,7 @@ ; LMULMAX1-RV32-NEXT: lui a2, 5 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v0, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -9 ; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 9, v0 @@ -5433,7 +5446,7 @@ ; LMULMAX1-RV64-NEXT: lui a2, 5 ; LMULMAX1-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2 +; LMULMAX1-RV64-NEXT: vmv.v.x v0, a2 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmv.v.i v10, -9 ; LMULMAX1-RV64-NEXT: vmerge.vim v10, v10, 9, v0 @@ -5453,14 +5466,16 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 7 -; LMULMAX2-RV32-NEXT: addi a1, a1, -1687 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 5 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1755 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: lui a1, 7 +; LMULMAX2-RV32-NEXT: addi a1, a1, -1687 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 1048571 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1755 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsra.vi v8, v8, 1 @@ -5473,14 +5488,16 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) -; LMULMAX2-RV64-NEXT: lui a1, 7 -; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687 -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV64-NEXT: lui a1, 5 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755 ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV64-NEXT: lui a1, 7 +; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV64-NEXT: lui a1, 1048571 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1 @@ -5496,7 +5513,7 @@ ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle16.v v9, (a1) ; LMULMAX1-NEXT: li a2, 105 -; LMULMAX1-NEXT: vmv.s.x v0, a2 +; LMULMAX1-NEXT: vmv.v.x v0, a2 ; LMULMAX1-NEXT: vmv.v.i v10, 7 ; LMULMAX1-NEXT: vmerge.vim v10, v10, -7, v0 ; LMULMAX1-NEXT: vdiv.vv v9, v9, v10 @@ -5515,13 +5532,15 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 85 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 419430 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1639 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: li a1, 85 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 629146 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1639 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 31 @@ -5552,11 +5571,10 @@ ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV32-NEXT: li a2, 5 -; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX1-RV32-NEXT: lui a2, 419430 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1639 ; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX1-RV32-NEXT: vmv.v.i v0, 5 ; LMULMAX1-RV32-NEXT: lui a2, 629146 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1639 ; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 @@ -5600,18 +5618,21 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32-NEXT: li a1, 17 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a2, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX2-RV32-NEXT: li a2, 17 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1366 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmulh.vv v10, v8, v10 ; LMULMAX2-RV32-NEXT: li a1, 51 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 0, v0 @@ -5620,7 +5641,8 @@ ; LMULMAX2-RV32-NEXT: li a1, 63 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v12, a1 ; LMULMAX2-RV32-NEXT: li a1, 68 -; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 @@ -5634,26 +5656,27 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, 5 -; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v0, 5 +; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX2-RV64-NEXT: lui a1, 349525 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 +; LMULMAX2-RV64-NEXT: slli a2, a1, 32 +; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI188_0) ; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) -; LMULMAX2-RV64-NEXT: slli a3, a1, 32 -; LMULMAX2-RV64-NEXT: add a1, a1, a3 -; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 -; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v10, v8, v10 ; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV64-NEXT: vmadd.vv v12, v8, v10 +; LMULMAX2-RV64-NEXT: vmv.v.x v14, a1 +; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v14, a2, v0 +; LMULMAX2-RV64-NEXT: vmulh.vv v14, v8, v14 +; LMULMAX2-RV64-NEXT: vmacc.vv v14, v8, v12 +; LMULMAX2-RV64-NEXT: vsra.vv v8, v14, v10 ; LMULMAX2-RV64-NEXT: li a1, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v8, v12, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 -; LMULMAX2-RV64-NEXT: vsra.vv v10, v12, v10 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 +; LMULMAX2-RV64-NEXT: vsrl.vx v10, v14, a1 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -16,10 +16,10 @@ ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v9, v8, v8 ; CHECK-NEXT: vrgather.vv v8, v10, v9 -; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v12, v10, 4 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vrgather.vi v8, v12, 0, v0.t ; CHECK-NEXT: vadd.vi v11, v9, 1 @@ -132,16 +132,16 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 82 +; RV32-NEXT: li a3, 78 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xce, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 78 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 57 +; RV32-NEXT: li a4, 53 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 @@ -156,150 +156,122 @@ ; RV32-NEXT: vs4r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vadd.vi v8, v24, -4 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 4 -; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: li a4, 13 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs4r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vrgather.vv v12, v16, v8 +; RV32-NEXT: vrgather.vv v4, v16, v8 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 45 +; RV32-NEXT: li a4, 41 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs4r.v v12, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vadd.vi v8, v24, -10 ; RV32-NEXT: lui a3, 12 -; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 5 -; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: li a4, 29 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a4, a3, 6 -; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: li a4, 69 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t +; RV32-NEXT: vrgather.vv v4, v16, v8, v0.t +; RV32-NEXT: lui a3, %hi(.LCPI6_0) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_0) +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vle32.v v24, (a3) +; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 41 +; RV32-NEXT: li a4, 61 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs4r.v v12, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a1, 128 -; RV32-NEXT: lui a4, %hi(.LCPI6_0) -; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a5, %hi(.LCPI6_1) -; RV32-NEXT: addi a5, a5, %lo(.LCPI6_1) -; RV32-NEXT: lui a6, 1 -; RV32-NEXT: vle32.v v8, (a4) -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a7, a4, 3 -; RV32-NEXT: add a4, a7, a4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vrgather.vv v8, v16, v24 +; RV32-NEXT: lui a3, %hi(.LCPI6_1) +; RV32-NEXT: addi a3, a3, %lo(.LCPI6_1) +; RV32-NEXT: lui a4, 1 +; RV32-NEXT: addi a4, a4, -64 +; RV32-NEXT: vle32.v v16, (a3) +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 21 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 73 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v8, (a5) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 25 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v8, (a3) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, a6, -64 -; RV32-NEXT: vmv.s.x v24, a1 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 +; RV32-NEXT: slli a3, a1, 4 ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v16, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v2, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv.v.v v8, v24 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 41 +; RV32-NEXT: li a3, 21 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vi v16, v12, -2 +; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vi v8, v24, -2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16 -; RV32-NEXT: vadd.vi v16, v12, -8 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v4, v16, v8 +; RV32-NEXT: vadd.vi v8, v24, -8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 29 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 69 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t -; RV32-NEXT: vmv.v.v v4, v8 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v4, v16, v8, v0.t ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu @@ -308,37 +280,42 @@ ; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 61 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v8, v16 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v24, v8, v16, v0.t +; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v4, v24 +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 25 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill @@ -347,12 +324,12 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v16, v8 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v12, v0, v8 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a1, a1, a3 @@ -361,104 +338,87 @@ ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vi v8, v8, -6 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 -; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 29 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t +; RV32-NEXT: vl1r.v v1, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 +; RV32-NEXT: li a3, 69 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t +; RV32-NEXT: vmv.v.v v4, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a3, %hi(.LCPI6_6) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a1) -; RV32-NEXT: vle32.v v8, (a3) -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 61 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v24, v16 +; RV32-NEXT: lui a1, %hi(.LCPI6_6) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_6) +; RV32-NEXT: li a3, 960 +; RV32-NEXT: vle32.v v24, (a1) +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv.v.v v12, v8 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 37 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a3, a1, 2 +; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v12, v24, v8 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 69 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v12, v24, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill @@ -467,132 +427,130 @@ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_9) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9) -; RV32-NEXT: vle32.v v24, (a1) +; RV32-NEXT: vle32.v v0, (a1) ; RV32-NEXT: vle32.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 29 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 61 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v0, v24 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v0 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 4 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 29 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv.v.v v12, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 5 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 37 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: lui a1, %hi(.LCPI6_10) -; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) -; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: lui a1, 15 -; RV32-NEXT: vmv.s.x v1, a1 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 3 -; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v4, v16, v12 -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vrgather.vv v4, v8, v12 +; RV32-NEXT: lui a1, %hi(.LCPI6_10) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) +; RV32-NEXT: lui a3, 15 +; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v2, a3 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 69 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vrgather.vv v4, v16, v8, v0.t ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu -; RV32-NEXT: lui a3, %hi(.LCPI6_12) -; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) -; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: vle32.v v8, (a3) +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 61 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 1008 -; RV32-NEXT: vmv.s.x v2, a1 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v16 +; RV32-NEXT: lui a1, %hi(.LCPI6_12) +; RV32-NEXT: addi a1, a1, %lo(.LCPI6_12) +; RV32-NEXT: li a3, 1008 +; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 73 +; RV32-NEXT: li a3, 53 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 49 +; RV32-NEXT: li a3, 45 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 57 +; RV32-NEXT: li a3, 29 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v4, v8 +; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 69 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a3, a1, 6 -; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: li a3, 41 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v12, v16, v8, v0.t +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v12, v24, v8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 45 +; RV32-NEXT: li a3, 41 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -602,38 +560,43 @@ ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a2, %hi(.LCPI6_15) ; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15) -; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v24, (a1) ; RV32-NEXT: vle32.v v8, (a2) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 6 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 69 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 73 +; RV32-NEXT: li a2, 61 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v24, v16 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v0, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 49 +; RV32-NEXT: li a2, 53 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a2, a1, 6 -; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 69 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 45 +; RV32-NEXT: li a2, 41 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -643,16 +606,14 @@ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vse32.v v12, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v4, (a1) -; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a3, a2, 5 -; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: li a3, 29 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) -; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 37 ; RV32-NEXT: mul a2, a2, a3 @@ -660,23 +621,31 @@ ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 25 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 41 +; RV32-NEXT: li a2, 21 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 82 +; RV32-NEXT: li a1, 78 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 @@ -691,72 +660,81 @@ ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd6, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 86 * vlenb -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: addi a2, a1, 128 -; RV64-NEXT: vle64.v v16, (a2) +; RV64-NEXT: addi a2, a1, 256 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 61 +; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, a1, 128 +; RV64-NEXT: vle64.v v24, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 +; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vid.v v8 -; RV64-NEXT: li a2, 6 -; RV64-NEXT: vmul.vx v8, v8, a2 -; RV64-NEXT: vrgather.vv v24, v0, v8 +; RV64-NEXT: li a1, 6 +; RV64-NEXT: vmul.vx v8, v8, a1 +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vrgather.vv v16, v0, v8 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 +; RV64-NEXT: li a3, 69 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 41 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vadd.vi v8, v8, -16 -; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t -; RV64-NEXT: addi a1, a1, 256 -; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 128 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 4 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v8, v16, 4 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v16, 8 +; RV64-NEXT: vslidedown.vi v24, v24, 8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t +; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 @@ -765,20 +743,20 @@ ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v8, v16, 1 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v24, 1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v0, v8 -; RV64-NEXT: vadd.vi v8, v16, -15 +; RV64-NEXT: vrgather.vv v16, v0, v8 +; RV64-NEXT: vadd.vi v8, v24, -15 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 @@ -786,20 +764,20 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 5 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 5 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 @@ -811,10 +789,10 @@ ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 3, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 3, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 @@ -832,54 +810,68 @@ ; RV64-NEXT: vmv.s.x v12, zero ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 13 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vslideup.vi v8, v12, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v12, v24, v8 -; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v12, v16, v8 +; RV64-NEXT: vrgather.vi v12, v24, 4, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v0, v8, 2 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v0, 2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v16, v0 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: li a1, 24 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vadd.vi v8, v0, -14 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v8, -14 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 41 @@ -887,7 +879,7 @@ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 @@ -898,28 +890,28 @@ ; RV64-NEXT: li a1, 1 ; RV64-NEXT: vmv.v.i v8, 7 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 21 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs4r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv.s.x v12, a1 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vslideup.vi v8, v12, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v20, v16, v8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 @@ -932,64 +924,64 @@ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v8, 5, v0.t +; RV64-NEXT: vrgather.vi v20, v8, 5, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v0, 3 +; RV64-NEXT: vadd.vi v24, v0, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v0, -13 +; RV64-NEXT: vadd.vi v24, v0, -13 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 13 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload @@ -1000,20 +992,24 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vslideup.vi v16, v8, 6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 192 -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v20, v8, 2 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 2 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 @@ -1027,82 +1023,76 @@ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v0, 4 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v24, v0, 4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 13 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24 ; RV64-NEXT: li a1, 28 -; RV64-NEXT: vmv.s.x v16, a1 -; RV64-NEXT: vadd.vi v8, v0, -12 -; RV64-NEXT: vmv1r.v v0, v16 -; RV64-NEXT: vmv1r.v v1, v16 +; RV64-NEXT: vadd.vi v24, v0, -12 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 -; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 13 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 5 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 5 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 21 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vslideup.vi v16, v8, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v4, v8, 3 +; RV64-NEXT: vrgather.vi v20, v8, 3 ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload @@ -1112,42 +1102,81 @@ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v4, v8, v16, v0.t +; RV64-NEXT: vrgather.vv v20, v8, v16, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v8, 5 +; RV64-NEXT: vadd.vi v24, v8, 5 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v0, v24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 45 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16 +; RV64-NEXT: vadd.vi v8, v24, -11 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v16, v16, -11 -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 77 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 45 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: addi a1, a0, 320 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v4, (a1) +; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a3, a2, 5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -100,16 +100,14 @@ define <3 x i1> @buildvec_mask_v1i1() { ; CHECK-LABEL: buildvec_mask_v1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v1i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: li a0, 2 ; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.i v0, 2 ; ZVE32F-NEXT: ret ret <3 x i1> } @@ -117,16 +115,14 @@ define <3 x i1> @buildvec_mask_optsize_v1i1() optsize { ; CHECK-LABEL: buildvec_mask_optsize_v1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_v1i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: li a0, 2 ; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.i v0, 2 ; ZVE32F-NEXT: ret ret <3 x i1> } @@ -134,16 +130,14 @@ define <4 x i1> @buildvec_mask_v4i1() { ; CHECK-LABEL: buildvec_mask_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 6 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v4i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: li a0, 6 ; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.i v0, 6 ; ZVE32F-NEXT: ret ret <4 x i1> } @@ -151,9 +145,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 3 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -162,9 +155,8 @@ ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: li a2, 3 ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a2 +; ZVE32F-NEXT: vmv.v.i v0, 3 ; ZVE32F-NEXT: vmv.v.x v8, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 @@ -243,14 +235,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 182 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v8i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: li a0, 182 ; ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.x v0, a0 ; ZVE32F-NEXT: ret ret <8 x i1> } @@ -258,9 +250,9 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 19 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: vmv.v.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 @@ -269,9 +261,9 @@ ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: li a2, 19 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a2 +; ZVE32F-NEXT: li a2, 19 +; ZVE32F-NEXT: vmv.v.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 @@ -420,14 +412,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 949 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v10i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: li a0, 949 ; ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.x v0, a0 ; ZVE32F-NEXT: ret ret <10 x i1> } @@ -438,7 +430,7 @@ ; CHECK-RV32-NEXT: lui a0, 11 ; CHECK-RV32-NEXT: addi a0, a0, 1718 ; CHECK-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-RV32-NEXT: vmv.s.x v0, a0 +; CHECK-RV32-NEXT: vmv.v.x v0, a0 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: buildvec_mask_v16i1: @@ -446,7 +438,7 @@ ; CHECK-RV64-NEXT: lui a0, 11 ; CHECK-RV64-NEXT: addiw a0, a0, 1718 ; CHECK-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-RV64-NEXT: vmv.s.x v0, a0 +; CHECK-RV64-NEXT: vmv.v.x v0, a0 ; CHECK-RV64-NEXT: ret ret <16 x i1> } @@ -456,14 +448,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 1722 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v16i1_undefs: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: li a0, 1722 ; ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.x v0, a0 ; ZVE32F-NEXT: ret ret <16 x i1> } @@ -473,20 +465,20 @@ ; RV32-LMULMAX1: # %bb.0: ; RV32-LMULMAX1-NEXT: li a0, 1776 ; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX1-NEXT: lui a0, 11 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX1-NEXT: ret ; ; RV64-LMULMAX1-LABEL: buildvec_mask_v32i1: ; RV64-LMULMAX1: # %bb.0: ; RV64-LMULMAX1-NEXT: li a0, 1776 ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV64-LMULMAX1-NEXT: ret ; ; RV32-LMULMAX2-LABEL: buildvec_mask_v32i1: @@ -494,7 +486,7 @@ ; RV32-LMULMAX2-NEXT: lui a0, 748384 ; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX2-NEXT: ret ; ; RV64-LMULMAX2-LABEL: buildvec_mask_v32i1: @@ -502,7 +494,7 @@ ; RV64-LMULMAX2-NEXT: lui a0, 748384 ; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX2-NEXT: ret ; ; RV32-LMULMAX4-LABEL: buildvec_mask_v32i1: @@ -510,7 +502,7 @@ ; RV32-LMULMAX4-NEXT: lui a0, 748384 ; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 ; RV32-LMULMAX4-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX4-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX4-NEXT: ret ; ; RV64-LMULMAX4-LABEL: buildvec_mask_v32i1: @@ -518,7 +510,7 @@ ; RV64-LMULMAX4-NEXT: lui a0, 748384 ; RV64-LMULMAX4-NEXT: addiw a0, a0, 1776 ; RV64-LMULMAX4-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-LMULMAX4-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX4-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX4-NEXT: ret ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v32i1: @@ -526,7 +518,7 @@ ; RV32-LMULMAX8-NEXT: lui a0, 748384 ; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 ; RV32-LMULMAX8-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX8-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX8-NEXT: ret ; ; RV64-LMULMAX8-LABEL: buildvec_mask_v32i1: @@ -534,7 +526,7 @@ ; RV64-LMULMAX8-NEXT: lui a0, 748384 ; RV64-LMULMAX8-NEXT: addiw a0, a0, 1776 ; RV64-LMULMAX8-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX8-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX8-NEXT: ret ret <32 x i1> } @@ -544,13 +536,13 @@ ; RV32-LMULMAX1: # %bb.0: ; RV32-LMULMAX1-NEXT: li a0, 1776 ; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX1-NEXT: lui a0, 4 ; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v9, a0 ; RV32-LMULMAX1-NEXT: lui a0, 11 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV32-LMULMAX1-NEXT: ret ; @@ -558,13 +550,13 @@ ; RV64-LMULMAX1: # %bb.0: ; RV64-LMULMAX1-NEXT: li a0, 1776 ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 4 ; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v9, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV64-LMULMAX1-NEXT: ret ; @@ -573,10 +565,10 @@ ; RV32-LMULMAX2-NEXT: lui a0, 748384 ; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX2-NEXT: lui a0, 748388 ; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX2-NEXT: ret ; ; RV64-LMULMAX2-LABEL: buildvec_mask_v64i1: @@ -584,10 +576,10 @@ ; RV64-LMULMAX2-NEXT: lui a0, 748384 ; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX2-NEXT: lui a0, 748388 ; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v8, a0 ; RV64-LMULMAX2-NEXT: ret ; ; RV32-LMULMAX4-LABEL: buildvec_mask_v64i1: @@ -595,11 +587,11 @@ ; RV32-LMULMAX4-NEXT: lui a0, 748388 ; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 ; RV32-LMULMAX4-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-LMULMAX4-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX4-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX4-NEXT: lui a0, 748384 ; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX4-NEXT: vslideup.vi v0, v8, 1 ; RV32-LMULMAX4-NEXT: ret ; ; RV64-LMULMAX4-LABEL: buildvec_mask_v64i1: @@ -615,11 +607,11 @@ ; RV32-LMULMAX8-NEXT: lui a0, 748388 ; RV32-LMULMAX8-NEXT: addi a0, a0, -1793 ; RV32-LMULMAX8-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-LMULMAX8-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX8-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX8-NEXT: lui a0, 748384 ; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX8-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX8-NEXT: vslideup.vi v0, v8, 1 ; RV32-LMULMAX8-NEXT: ret ; ; RV64-LMULMAX8-LABEL: buildvec_mask_v64i1: @@ -637,19 +629,19 @@ ; RV32-LMULMAX1: # %bb.0: ; RV32-LMULMAX1-NEXT: li a0, 1776 ; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX1-NEXT: lui a0, 11 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX1-NEXT: lui a0, 8 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v12, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v12, a0 ; RV32-LMULMAX1-NEXT: lui a0, 4 ; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v9, a0 ; RV32-LMULMAX1-NEXT: lui a0, 14 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1722 -; RV32-LMULMAX1-NEXT: vmv.s.x v14, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v14, a0 ; RV32-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV32-LMULMAX1-NEXT: vmv1r.v v11, v0 ; RV32-LMULMAX1-NEXT: vmv1r.v v13, v9 @@ -659,19 +651,19 @@ ; RV64-LMULMAX1: # %bb.0: ; RV64-LMULMAX1-NEXT: li a0, 1776 ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV64-LMULMAX1-NEXT: lui a0, 8 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v12, a0 ; RV64-LMULMAX1-NEXT: lui a0, 4 ; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v9, a0 ; RV64-LMULMAX1-NEXT: lui a0, 14 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1722 -; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v14, a0 ; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0 ; RV64-LMULMAX1-NEXT: vmv1r.v v13, v9 @@ -682,16 +674,16 @@ ; RV32-LMULMAX2-NEXT: lui a0, 748384 ; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX2-NEXT: lui a0, 748388 ; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX2-NEXT: lui a0, 551776 ; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v9, a0 ; RV32-LMULMAX2-NEXT: lui a0, 945060 ; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v10, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v10, a0 ; RV32-LMULMAX2-NEXT: ret ; ; RV64-LMULMAX2-LABEL: buildvec_mask_v128i1: @@ -699,16 +691,16 @@ ; RV64-LMULMAX2-NEXT: lui a0, 748384 ; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX2-NEXT: lui a0, 748388 ; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v8, a0 ; RV64-LMULMAX2-NEXT: lui a0, 551776 ; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v9, a0 ; RV64-LMULMAX2-NEXT: lui a0, 945060 ; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v10, a0 ; RV64-LMULMAX2-NEXT: ret ; ; RV32-LMULMAX4-LABEL: buildvec_mask_v128i1: @@ -716,18 +708,19 @@ ; RV32-LMULMAX4-NEXT: lui a0, 748388 ; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 ; RV32-LMULMAX4-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-LMULMAX4-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX4-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX4-NEXT: lui a0, 748384 ; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; RV32-LMULMAX4-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX4-NEXT: vslideup.vi v0, v8, 1 ; RV32-LMULMAX4-NEXT: lui a0, 945060 ; RV32-LMULMAX4-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX4-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; RV32-LMULMAX4-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX4-NEXT: lui a0, 551776 ; RV32-LMULMAX4-NEXT: addi a0, a0, 1776 +; RV32-LMULMAX4-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; RV32-LMULMAX4-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX4-NEXT: vslideup.vi v8, v9, 1 ; RV32-LMULMAX4-NEXT: ret ; ; RV64-LMULMAX4-LABEL: buildvec_mask_v128i1: @@ -743,37 +736,31 @@ ; ; RV32-LMULMAX8-LABEL: buildvec_mask_v128i1: ; RV32-LMULMAX8: # %bb.0: -; RV32-LMULMAX8-NEXT: lui a0, 748388 -; RV32-LMULMAX8-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX8-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV32-LMULMAX8-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX8-NEXT: lui a0, 748384 -; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX8-NEXT: vmv.s.x v0, a0 -; RV32-LMULMAX8-NEXT: vslideup.vi v0, v8, 1 -; RV32-LMULMAX8-NEXT: lui a0, 551776 -; RV32-LMULMAX8-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX8-NEXT: vmv.s.x v8, a0 -; RV32-LMULMAX8-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV32-LMULMAX8-NEXT: vslideup.vi v0, v8, 2 -; RV32-LMULMAX8-NEXT: lui a0, 945060 -; RV32-LMULMAX8-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX8-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV32-LMULMAX8-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-LMULMAX8-NEXT: vslideup.vi v0, v8, 3 +; RV32-LMULMAX8-NEXT: vle32.v v0, (a0) ; RV32-LMULMAX8-NEXT: ret ; ; RV64-LMULMAX8-LABEL: buildvec_mask_v128i1: ; RV64-LMULMAX8: # %bb.0: ; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_0) -; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64-LMULMAX8-NEXT: lui a1, %hi(.LCPI20_1) -; RV64-LMULMAX8-NEXT: ld a1, %lo(.LCPI20_1)(a1) +; RV64-LMULMAX8-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV64-LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-LMULMAX8-NEXT: vmv.s.x v8, a0 -; RV64-LMULMAX8-NEXT: vmv.s.x v0, a1 -; RV64-LMULMAX8-NEXT: vslideup.vi v0, v8, 1 +; RV64-LMULMAX8-NEXT: vlse64.v v0, (a0), zero +; RV64-LMULMAX8-NEXT: lui a0, %hi(.LCPI20_1) +; RV64-LMULMAX8-NEXT: ld a0, %lo(.LCPI20_1)(a0) +; RV64-LMULMAX8-NEXT: vsetvli zero, zero, e64, m1, tu, ma +; RV64-LMULMAX8-NEXT: vmv.s.x v0, a0 ; RV64-LMULMAX8-NEXT: ret +; +; ZVE32F-LABEL: buildvec_mask_v128i1: +; ZVE32F: # %bb.0: +; ZVE32F-NEXT: lui a0, %hi(.LCPI20_0) +; ZVE32F-NEXT: addi a0, a0, %lo(.LCPI20_0) +; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; ZVE32F-NEXT: vle32.v v0, (a0) +; ZVE32F-NEXT: ret ret <128 x i1> } @@ -782,19 +769,19 @@ ; RV32-LMULMAX1: # %bb.0: ; RV32-LMULMAX1-NEXT: li a0, 1776 ; RV32-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX1-NEXT: lui a0, 11 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX1-NEXT: lui a0, 8 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1718 -; RV32-LMULMAX1-NEXT: vmv.s.x v12, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v12, a0 ; RV32-LMULMAX1-NEXT: lui a0, 4 ; RV32-LMULMAX1-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v9, a0 ; RV32-LMULMAX1-NEXT: lui a0, 14 ; RV32-LMULMAX1-NEXT: addi a0, a0, 1722 -; RV32-LMULMAX1-NEXT: vmv.s.x v14, a0 +; RV32-LMULMAX1-NEXT: vmv.v.x v14, a0 ; RV32-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV32-LMULMAX1-NEXT: vmv1r.v v11, v0 ; RV32-LMULMAX1-NEXT: vmv1r.v v13, v9 @@ -804,19 +791,19 @@ ; RV64-LMULMAX1: # %bb.0: ; RV64-LMULMAX1-NEXT: li a0, 1776 ; RV64-LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-LMULMAX1-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX1-NEXT: lui a0, 11 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v8, a0 ; RV64-LMULMAX1-NEXT: lui a0, 8 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1718 -; RV64-LMULMAX1-NEXT: vmv.s.x v12, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v12, a0 ; RV64-LMULMAX1-NEXT: lui a0, 4 ; RV64-LMULMAX1-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX1-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v9, a0 ; RV64-LMULMAX1-NEXT: lui a0, 14 ; RV64-LMULMAX1-NEXT: addiw a0, a0, 1722 -; RV64-LMULMAX1-NEXT: vmv.s.x v14, a0 +; RV64-LMULMAX1-NEXT: vmv.v.x v14, a0 ; RV64-LMULMAX1-NEXT: vmv1r.v v10, v8 ; RV64-LMULMAX1-NEXT: vmv1r.v v11, v0 ; RV64-LMULMAX1-NEXT: vmv1r.v v13, v9 @@ -827,16 +814,16 @@ ; RV32-LMULMAX2-NEXT: lui a0, 748384 ; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 ; RV32-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV32-LMULMAX2-NEXT: lui a0, 748388 ; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v8, a0 ; RV32-LMULMAX2-NEXT: lui a0, 551776 ; RV32-LMULMAX2-NEXT: addi a0, a0, 1776 -; RV32-LMULMAX2-NEXT: vmv.s.x v9, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v9, a0 ; RV32-LMULMAX2-NEXT: lui a0, 945060 ; RV32-LMULMAX2-NEXT: addi a0, a0, -1793 -; RV32-LMULMAX2-NEXT: vmv.s.x v10, a0 +; RV32-LMULMAX2-NEXT: vmv.v.x v10, a0 ; RV32-LMULMAX2-NEXT: ret ; ; RV64-LMULMAX2-LABEL: buildvec_mask_optsize_v128i1: @@ -844,16 +831,16 @@ ; RV64-LMULMAX2-NEXT: lui a0, 748384 ; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 ; RV64-LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-LMULMAX2-NEXT: vmv.s.x v0, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v0, a0 ; RV64-LMULMAX2-NEXT: lui a0, 748388 ; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v8, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v8, a0 ; RV64-LMULMAX2-NEXT: lui a0, 551776 ; RV64-LMULMAX2-NEXT: addiw a0, a0, 1776 -; RV64-LMULMAX2-NEXT: vmv.s.x v9, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v9, a0 ; RV64-LMULMAX2-NEXT: lui a0, 945060 ; RV64-LMULMAX2-NEXT: addiw a0, a0, -1793 -; RV64-LMULMAX2-NEXT: vmv.s.x v10, a0 +; RV64-LMULMAX2-NEXT: vmv.v.x v10, a0 ; RV64-LMULMAX2-NEXT: ret ; ; RV32-LMULMAX4-LABEL: buildvec_mask_optsize_v128i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -181,8 +181,7 @@ ; ZVE32F-NEXT: vle32.v v8, (a0) ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; ZVE32F-NEXT: li a0, 2 -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.i v0, 2 ; ZVE32F-NEXT: vrgather.vi v10, v8, 1 ; ZVE32F-NEXT: vrgather.vi v10, v9, 1, v0.t ; ZVE32F-NEXT: vse32.v v10, (a1) @@ -237,8 +236,7 @@ ; ZVE32F-NEXT: vle32.v v8, (a0) ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; ZVE32F-NEXT: li a0, 2 -; ZVE32F-NEXT: vmv.s.x v0, a0 +; ZVE32F-NEXT: vmv.v.i v0, 2 ; ZVE32F-NEXT: vrgather.vi v10, v8, 1 ; ZVE32F-NEXT: vrgather.vi v10, v9, 1, v0.t ; ZVE32F-NEXT: vse32.v v10, (a1) @@ -282,8 +280,7 @@ ; V-NEXT: vle64.v v8, (a0) ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 -; V-NEXT: li a0, 2 -; V-NEXT: vmv.s.x v0, a0 +; V-NEXT: vmv.v.i v0, 2 ; V-NEXT: vrgather.vi v10, v8, 1 ; V-NEXT: vrgather.vi v10, v9, 1, v0.t ; V-NEXT: vse64.v v10, (a1) @@ -335,8 +332,7 @@ ; V-NEXT: vle64.v v8, (a0) ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 -; V-NEXT: li a0, 2 -; V-NEXT: vmv.s.x v0, a0 +; V-NEXT: vmv.v.i v0, 2 ; V-NEXT: vrgather.vi v10, v8, 1 ; V-NEXT: vrgather.vi v10, v9, 1, v0.t ; V-NEXT: vse64.v v10, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1296,10 +1296,12 @@ ; RV32-NEXT: vmv1r.v v16, v0 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: lui a3, 341 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vmv.v.x v24, a1 +; RV32-NEXT: lui a1, 341 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a1 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmerge.vxm v24, v24, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v16 @@ -1322,10 +1324,12 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: lui a3, 341 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: lui a1, 341 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a1 +; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vmerge.vxm v16, v16, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -49,8 +49,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: li a0, 15 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -79,11 +78,11 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -119,7 +118,7 @@ ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV32-NEXT: vrgather.vv v10, v14, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 @@ -139,7 +138,7 @@ ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; RV64-NEXT: vrgather.vv v10, v14, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v10 @@ -195,8 +194,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: li a0, 15 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -222,13 +220,15 @@ ; CHECK-LABEL: v8i16_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v16 -; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v14, 7 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -265,7 +265,7 @@ ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; RV32-NEXT: vrgather.vv v8, v16, v12, v0.t ; RV32-NEXT: ret @@ -285,7 +285,7 @@ ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; RV64-NEXT: vrgather.vv v8, v16, v12, v0.t ; RV64-NEXT: ret @@ -337,13 +337,14 @@ ; CHECK-LABEL: v4i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v16 -; CHECK-NEXT: li a0, 15 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v14, 3 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -368,13 +369,15 @@ ; CHECK-LABEL: v8i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 -; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v20, 7 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -401,7 +404,7 @@ ; RV32-NEXT: lui a0, %hi(.LCPI23_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI23_0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vle32.v v0, (a0) ; RV32-NEXT: vmv4r.v v24, v12 ; RV32-NEXT: vmv4r.v v16, v8 @@ -410,7 +413,9 @@ ; RV32-NEXT: vrsub.vi v16, v16, 15 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV32-NEXT: ret ; @@ -419,7 +424,7 @@ ; RV64-NEXT: lui a0, %hi(.LCPI23_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI23_0) ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-NEXT: vle32.v v0, (a0) ; RV64-NEXT: vmv4r.v v24, v12 ; RV64-NEXT: vmv4r.v v16, v8 @@ -428,7 +433,9 @@ ; RV64-NEXT: vrsub.vi v16, v16, 15 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vmv.v.x v0, a0 +; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV64-NEXT: ret %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> @@ -494,10 +501,9 @@ ; RV32-NEXT: vrsub.vi v19, v18, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v12, v8, v19 -; RV32-NEXT: li a0, 15 -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v12 @@ -506,13 +512,14 @@ ; RV64-LABEL: v4i64_2: ; RV64: # %bb.0: ; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vid.v v20 ; RV64-NEXT: vrsub.vi v24, v20, 7 ; RV64-NEXT: vrgather.vv v12, v8, v24 -; RV64-NEXT: li a0, 15 -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vrsub.vi v8, v20, 3 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret @@ -567,8 +574,7 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: li a0, 15 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -594,13 +600,15 @@ ; CHECK-LABEL: v8f16_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v16 -; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v14, 7 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -683,13 +691,14 @@ ; CHECK-LABEL: v4f32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v16 -; CHECK-NEXT: li a0, 15 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v14, 3 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 15 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -714,13 +723,15 @@ ; CHECK-LABEL: v8f32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 -; CHECK-NEXT: li a0, 255 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrsub.vi v8, v20, 7 +; CHECK-NEXT: li a0, 255 +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -787,10 +798,9 @@ ; RV32-NEXT: vrsub.vi v19, v18, 7 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v12, v8, v19 -; RV32-NEXT: li a0, 15 -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vrsub.vi v8, v18, 3 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v12 @@ -799,13 +809,14 @@ ; RV64-LABEL: v4f64_2: ; RV64: # %bb.0: ; RV64-NEXT: vmv2r.v v16, v10 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vid.v v20 ; RV64-NEXT: vrsub.vi v24, v20, 7 ; RV64-NEXT: vrgather.vv v12, v8, v24 -; RV64-NEXT: li a0, 15 -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vrsub.vi v8, v20, 3 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.i v0, 15 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -17,12 +17,12 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -256 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV32-NEXT: vmsne.vi v9, v9, 0 ; RV32-NEXT: vadd.vi v12, v11, 1 @@ -45,12 +45,12 @@ ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -256 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.s.x v0, a0 +; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV64-NEXT: vmsne.vi v9, v9, 0 ; RV64-NEXT: vadd.vi v12, v11, 1 @@ -109,8 +109,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 2 -; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t @@ -196,8 +196,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v10, v8, 2 -; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vrgather.vi v9, v8, 1 ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -662,7 +662,9 @@ ; RV32MV-NEXT: vslide1down.vx v8, v8, a1 ; RV32MV-NEXT: vslidedown.vi v8, v8, 2 ; RV32MV-NEXT: li a0, 85 -; RV32MV-NEXT: vmv.s.x v0, a0 +; RV32MV-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV32MV-NEXT: vmv.v.x v0, a0 +; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32MV-NEXT: vmv.v.i v10, 1 ; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0 ; RV32MV-NEXT: vand.vv v8, v8, v10