diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3186,7 +3186,24 @@ return Vec; } - return SDValue(); + // For constant vectors, use generic constant pool lowering. Otherwise, + // we'd have to materialize constants in GPRs just to move them into the + // vector. + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) + return SDValue(); + + // We can use a series of vslide1down instructions to move values in GPRs + // into the appropriate place in the result vector. We use slide1down + // to avoid the register group overlap constraint of vslide1up. + if (VT.isFloatingPoint()) + // TODO: Use vfslide1down. + return SDValue(); + + SDValue Vec = DAG.getUNDEF(ContainerVT); + for (const SDValue &V : Op->ops()) + Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, ContainerVT, + DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL); + return convertFromScalableVector(VT, Vec, DAG, Subtarget); } static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll @@ -34,29 +34,23 @@ define <4 x i1> @reverse_v4i1(<4 x i1> %a) { ; CHECK-LABEL: reverse_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vslidedown.vi v9, v8, 1 -; CHECK-NEXT: addi a0, sp, 14 -; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; CHECK-NEXT: vse8.v v9, (a0) -; CHECK-NEXT: vslidedown.vi v9, v8, 2 -; CHECK-NEXT: addi a0, sp, 13 -; CHECK-NEXT: vse8.v v9, (a0) -; CHECK-NEXT: vslidedown.vi v8, v8, 3 -; CHECK-NEXT: addi a0, sp, 12 -; CHECK-NEXT: vse8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vfirst.m a1, v0 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: sb a1, 15(sp) -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vslidedown.vi v9, v8, 3 +; CHECK-NEXT: vmv.x.s a0, v9 +; CHECK-NEXT: vslide1down.vx v9, v8, a0 +; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vmv.x.s a0, v10 +; CHECK-NEXT: vslide1down.vx v9, v9, a0 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vslide1down.vx v8, v9, a0 +; CHECK-NEXT: vfirst.m a0, v0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call <4 x i1> @llvm.experimental.vector.reverse.v4i1(<4 x i1> %a) ret <4 x i1> %res @@ -65,224 +59,200 @@ define <8 x i1> @reverse_v8i1(<8 x i1> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v8i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 -; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 -; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0 -; RV32-BITS-UNKNOWN-NEXT: sb a0, 15(sp) +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 ; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 30 ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a0, 8(sp) -; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 8 -; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_v8i1: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: addi sp, sp, -16 -; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16 -; RV32-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-BITS-256-NEXT: vfirst.m a0, v0 -; RV32-BITS-256-NEXT: seqz a0, a0 -; RV32-BITS-256-NEXT: sb a0, 15(sp) +; RV32-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV32-BITS-256-NEXT: vmv.x.s a0, v0 -; RV32-BITS-256-NEXT: slli a1, a0, 30 +; RV32-BITS-256-NEXT: slli a1, a0, 24 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 14(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 29 +; RV32-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 25 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 13(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 26 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 12(sp) +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 ; RV32-BITS-256-NEXT: slli a1, a0, 27 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 11(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 28 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 10(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 29 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 9(sp) -; RV32-BITS-256-NEXT: slli a0, a0, 24 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a0, a0, 30 ; RV32-BITS-256-NEXT: srli a0, a0, 31 -; RV32-BITS-256-NEXT: sb a0, 8(sp) -; RV32-BITS-256-NEXT: addi a0, sp, 8 -; RV32-BITS-256-NEXT: vle8.v v8, (a0) +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: vfirst.m a0, v0 +; RV32-BITS-256-NEXT: seqz a0, a0 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-256-NEXT: addi sp, sp, 16 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_v8i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: addi sp, sp, -16 -; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16 -; RV32-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-BITS-512-NEXT: vfirst.m a0, v0 -; RV32-BITS-512-NEXT: seqz a0, a0 -; RV32-BITS-512-NEXT: sb a0, 15(sp) +; RV32-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV32-BITS-512-NEXT: vmv.x.s a0, v0 -; RV32-BITS-512-NEXT: slli a1, a0, 30 +; RV32-BITS-512-NEXT: slli a1, a0, 24 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 14(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 25 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 13(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 26 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 12(sp) +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 ; RV32-BITS-512-NEXT: slli a1, a0, 27 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 11(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 28 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 10(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 29 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 9(sp) -; RV32-BITS-512-NEXT: slli a0, a0, 24 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a0, a0, 30 ; RV32-BITS-512-NEXT: srli a0, a0, 31 -; RV32-BITS-512-NEXT: sb a0, 8(sp) -; RV32-BITS-512-NEXT: addi a0, sp, 8 -; RV32-BITS-512-NEXT: vle8.v v8, (a0) +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: vfirst.m a0, v0 +; RV32-BITS-512-NEXT: seqz a0, a0 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-512-NEXT: addi sp, sp, 16 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_v8i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 -; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 -; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0 -; RV64-BITS-UNKNOWN-NEXT: sb a0, 15(sp) +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp) +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 ; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62 ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a0, 8(sp) -; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 8 -; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_v8i1: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: addi sp, sp, -16 -; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16 -; RV64-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-BITS-256-NEXT: vfirst.m a0, v0 -; RV64-BITS-256-NEXT: seqz a0, a0 -; RV64-BITS-256-NEXT: sb a0, 15(sp) +; RV64-BITS-256-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV64-BITS-256-NEXT: vmv.x.s a0, v0 -; RV64-BITS-256-NEXT: slli a1, a0, 62 +; RV64-BITS-256-NEXT: slli a1, a0, 56 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 14(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 57 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 13(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 58 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 12(sp) +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 ; RV64-BITS-256-NEXT: slli a1, a0, 59 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 11(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 60 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 10(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 61 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 9(sp) -; RV64-BITS-256-NEXT: slli a0, a0, 56 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a0, a0, 62 ; RV64-BITS-256-NEXT: srli a0, a0, 63 -; RV64-BITS-256-NEXT: sb a0, 8(sp) -; RV64-BITS-256-NEXT: addi a0, sp, 8 -; RV64-BITS-256-NEXT: vle8.v v8, (a0) +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-256-NEXT: vfirst.m a0, v0 +; RV64-BITS-256-NEXT: seqz a0, a0 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-256-NEXT: addi sp, sp, 16 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_v8i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: addi sp, sp, -16 -; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16 -; RV64-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-BITS-512-NEXT: vfirst.m a0, v0 -; RV64-BITS-512-NEXT: seqz a0, a0 -; RV64-BITS-512-NEXT: sb a0, 15(sp) +; RV64-BITS-512-NEXT: vsetivli zero, 0, e8, mf8, ta, ma ; RV64-BITS-512-NEXT: vmv.x.s a0, v0 -; RV64-BITS-512-NEXT: slli a1, a0, 62 +; RV64-BITS-512-NEXT: slli a1, a0, 56 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 14(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 57 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 13(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 58 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 12(sp) +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 ; RV64-BITS-512-NEXT: slli a1, a0, 59 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 11(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 60 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 10(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 61 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 9(sp) -; RV64-BITS-512-NEXT: slli a0, a0, 56 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a0, a0, 62 ; RV64-BITS-512-NEXT: srli a0, a0, 63 -; RV64-BITS-512-NEXT: sb a0, 8(sp) -; RV64-BITS-512-NEXT: addi a0, sp, 8 -; RV64-BITS-512-NEXT: vle8.v v8, (a0) +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vfirst.m a0, v0 +; RV64-BITS-512-NEXT: seqz a0, a0 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-512-NEXT: addi sp, sp, 16 ; RV64-BITS-512-NEXT: ret %res = call <8 x i1> @llvm.experimental.vector.reverse.v8i1(<8 x i1> %a) ret <8 x i1> %res @@ -291,380 +261,344 @@ define <16 x i1> @reverse_v16i1(<16 x i1> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v16i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 -; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 -; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0 -; RV32-BITS-UNKNOWN-NEXT: sb a0, 15(sp) ; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 14(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 13(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 12(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 11(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 10(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 9(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 ; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 7(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 6(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 5(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 4(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 3(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 2(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 30 ; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a0, 0(sp) -; RV32-BITS-UNKNOWN-NEXT: mv a0, sp -; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_v16i1: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: addi sp, sp, -16 -; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16 -; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-BITS-256-NEXT: vfirst.m a0, v0 -; RV32-BITS-256-NEXT: seqz a0, a0 -; RV32-BITS-256-NEXT: sb a0, 15(sp) ; RV32-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV32-BITS-256-NEXT: vmv.x.s a0, v0 -; RV32-BITS-256-NEXT: slli a1, a0, 30 +; RV32-BITS-256-NEXT: slli a1, a0, 16 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 14(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 29 +; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 17 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 13(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 18 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 12(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 27 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 19 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 11(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 20 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 10(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 21 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 9(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 24 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 22 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 8(sp) +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 ; RV32-BITS-256-NEXT: slli a1, a0, 23 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 7(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 22 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 24 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 6(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 21 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 25 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 5(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 20 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 26 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 4(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 19 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 27 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 3(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 18 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 28 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 2(sp) -; RV32-BITS-256-NEXT: slli a1, a0, 17 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 29 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 1(sp) -; RV32-BITS-256-NEXT: slli a0, a0, 16 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a0, a0, 30 ; RV32-BITS-256-NEXT: srli a0, a0, 31 -; RV32-BITS-256-NEXT: sb a0, 0(sp) -; RV32-BITS-256-NEXT: mv a0, sp -; RV32-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-BITS-256-NEXT: vle8.v v8, (a0) +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: vfirst.m a0, v0 +; RV32-BITS-256-NEXT: seqz a0, a0 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-256-NEXT: addi sp, sp, 16 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_v16i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: addi sp, sp, -16 -; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16 -; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-BITS-512-NEXT: vfirst.m a0, v0 -; RV32-BITS-512-NEXT: seqz a0, a0 -; RV32-BITS-512-NEXT: sb a0, 15(sp) ; RV32-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV32-BITS-512-NEXT: vmv.x.s a0, v0 -; RV32-BITS-512-NEXT: slli a1, a0, 30 +; RV32-BITS-512-NEXT: slli a1, a0, 16 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 14(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 17 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 13(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 18 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 12(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 27 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 19 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 11(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 20 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 10(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 21 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 9(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 24 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 22 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 8(sp) +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 ; RV32-BITS-512-NEXT: slli a1, a0, 23 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 7(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 22 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 24 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 6(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 21 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 25 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 5(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 20 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 26 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 4(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 19 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 27 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 3(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 18 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 28 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 2(sp) -; RV32-BITS-512-NEXT: slli a1, a0, 17 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 29 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 1(sp) -; RV32-BITS-512-NEXT: slli a0, a0, 16 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a0, a0, 30 ; RV32-BITS-512-NEXT: srli a0, a0, 31 -; RV32-BITS-512-NEXT: sb a0, 0(sp) -; RV32-BITS-512-NEXT: mv a0, sp -; RV32-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-BITS-512-NEXT: vle8.v v8, (a0) +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: vfirst.m a0, v0 +; RV32-BITS-512-NEXT: seqz a0, a0 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-512-NEXT: addi sp, sp, 16 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_v16i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 -; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 -; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0 -; RV64-BITS-UNKNOWN-NEXT: sb a0, 15(sp) ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 14(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 13(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 12(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 11(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 10(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 9(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 8(sp) +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 ; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 7(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 6(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 5(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 4(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 3(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 2(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 ; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 48 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62 ; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a0, 0(sp) -; RV64-BITS-UNKNOWN-NEXT: mv a0, sp -; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a0) +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_v16i1: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: addi sp, sp, -16 -; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16 -; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-BITS-256-NEXT: vfirst.m a0, v0 -; RV64-BITS-256-NEXT: seqz a0, a0 -; RV64-BITS-256-NEXT: sb a0, 15(sp) ; RV64-BITS-256-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV64-BITS-256-NEXT: vmv.x.s a0, v0 -; RV64-BITS-256-NEXT: slli a1, a0, 62 +; RV64-BITS-256-NEXT: slli a1, a0, 48 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 14(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 49 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 13(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 50 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 12(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 59 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 51 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 11(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 52 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 10(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 53 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 9(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 56 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 54 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 8(sp) +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 ; RV64-BITS-256-NEXT: slli a1, a0, 55 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 7(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 54 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 56 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 6(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 53 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 57 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 5(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 52 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 58 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 4(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 51 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 59 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 3(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 50 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 60 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 2(sp) -; RV64-BITS-256-NEXT: slli a1, a0, 49 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 61 ; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 1(sp) -; RV64-BITS-256-NEXT: slli a0, a0, 48 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a0, a0, 62 ; RV64-BITS-256-NEXT: srli a0, a0, 63 -; RV64-BITS-256-NEXT: sb a0, 0(sp) -; RV64-BITS-256-NEXT: mv a0, sp -; RV64-BITS-256-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-BITS-256-NEXT: vle8.v v8, (a0) +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-256-NEXT: vfirst.m a0, v0 +; RV64-BITS-256-NEXT: seqz a0, a0 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-256-NEXT: addi sp, sp, 16 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_v16i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: addi sp, sp, -16 -; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16 -; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-BITS-512-NEXT: vfirst.m a0, v0 -; RV64-BITS-512-NEXT: seqz a0, a0 -; RV64-BITS-512-NEXT: sb a0, 15(sp) ; RV64-BITS-512-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV64-BITS-512-NEXT: vmv.x.s a0, v0 -; RV64-BITS-512-NEXT: slli a1, a0, 62 -; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 14(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: slli a1, a0, 48 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 13(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 49 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 12(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 59 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 50 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 11(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 51 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 10(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 52 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 9(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 56 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 53 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 54 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 8(sp) +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 ; RV64-BITS-512-NEXT: slli a1, a0, 55 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 7(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 54 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 56 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 6(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 53 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 57 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 5(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 52 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 58 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 4(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 51 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 59 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 3(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 50 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 60 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 2(sp) -; RV64-BITS-512-NEXT: slli a1, a0, 49 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 61 ; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 1(sp) -; RV64-BITS-512-NEXT: slli a0, a0, 48 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a0, a0, 62 ; RV64-BITS-512-NEXT: srli a0, a0, 63 -; RV64-BITS-512-NEXT: sb a0, 0(sp) -; RV64-BITS-512-NEXT: mv a0, sp -; RV64-BITS-512-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-BITS-512-NEXT: vle8.v v8, (a0) +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vfirst.m a0, v0 +; RV64-BITS-512-NEXT: seqz a0, a0 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-512-NEXT: addi sp, sp, 16 ; RV64-BITS-512-NEXT: ret %res = call <16 x i1> @llvm.experimental.vector.reverse.v16i1(<16 x i1> %a) ret <16 x i1> %res @@ -673,728 +607,632 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v32i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -64 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 64 -; RV32-BITS-UNKNOWN-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-BITS-UNKNOWN-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4 -; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8 -; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 64 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -32 -; RV32-BITS-UNKNOWN-NEXT: li a0, 32 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vfirst.m a1, v0 -; RV32-BITS-UNKNOWN-NEXT: seqz a1, a1 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 31(sp) ; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 0(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 30(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 29(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 28(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 27(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 26(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 25(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 24(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 23(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 22(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 21(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 20(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 19(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 18(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 17(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 16(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 15(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 14(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 13(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 12(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 11(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 10(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 9(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 8(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 7(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 6(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 5(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 4(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 3(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 2(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1 +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: li a2, 32 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 6 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 7 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 8 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 9 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 10 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 11 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 12 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 13 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 14 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 15 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 23 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 28 ; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp) -; RV32-BITS-UNKNOWN-NEXT: mv a1, sp -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 30 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -64 -; RV32-BITS-UNKNOWN-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-BITS-UNKNOWN-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 64 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_v32i1: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: addi sp, sp, -64 -; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 64 -; RV32-BITS-256-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-BITS-256-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-BITS-256-NEXT: .cfi_offset ra, -4 -; RV32-BITS-256-NEXT: .cfi_offset s0, -8 -; RV32-BITS-256-NEXT: addi s0, sp, 64 -; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-256-NEXT: andi sp, sp, -32 -; RV32-BITS-256-NEXT: li a0, 32 -; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV32-BITS-256-NEXT: vfirst.m a1, v0 -; RV32-BITS-256-NEXT: seqz a1, a1 -; RV32-BITS-256-NEXT: sb a1, 31(sp) ; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV32-BITS-256-NEXT: vmv.x.s a1, v0 -; RV32-BITS-256-NEXT: srli a2, a1, 31 -; RV32-BITS-256-NEXT: sb a2, 0(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 30 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 30(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 29 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 29(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 28 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 28(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 27 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 27(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 26 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 26(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 25 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 25(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 24 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 24(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 23 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 23(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 22 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 22(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 21 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 21(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 20 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 20(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 19 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 19(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 18 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 18(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 17 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 17(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 16 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 16(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 15 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 15(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 14 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 14(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 13 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 13(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 12 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 12(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 11 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 11(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 10 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 10(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 9 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 9(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 8 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 8(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 7 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 7(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 6 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 6(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 5 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 5(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 4 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 4(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 3 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 3(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 2 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 2(sp) -; RV32-BITS-256-NEXT: slli a1, a1, 1 +; RV32-BITS-256-NEXT: vmv.x.s a0, v0 +; RV32-BITS-256-NEXT: srli a1, a0, 31 +; RV32-BITS-256-NEXT: li a2, 32 +; RV32-BITS-256-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 1 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 4 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 5 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 6 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 7 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 8 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 9 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 10 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 11 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 12 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 13 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 14 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 15 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 16 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 17 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 18 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 19 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 20 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 21 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 22 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 23 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 24 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 25 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 26 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 27 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 28 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a1, a0, 29 ; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 1(sp) -; RV32-BITS-256-NEXT: mv a1, sp -; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV32-BITS-256-NEXT: vle8.v v8, (a1) +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: slli a0, a0, 30 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: vfirst.m a0, v0 +; RV32-BITS-256-NEXT: seqz a0, a0 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-256-NEXT: addi sp, s0, -64 -; RV32-BITS-256-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-BITS-256-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-BITS-256-NEXT: addi sp, sp, 64 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_v32i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: addi sp, sp, -64 -; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 64 -; RV32-BITS-512-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-BITS-512-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-BITS-512-NEXT: .cfi_offset ra, -4 -; RV32-BITS-512-NEXT: .cfi_offset s0, -8 -; RV32-BITS-512-NEXT: addi s0, sp, 64 -; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-512-NEXT: andi sp, sp, -32 -; RV32-BITS-512-NEXT: li a0, 32 -; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV32-BITS-512-NEXT: vfirst.m a1, v0 -; RV32-BITS-512-NEXT: seqz a1, a1 -; RV32-BITS-512-NEXT: sb a1, 31(sp) ; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV32-BITS-512-NEXT: vmv.x.s a1, v0 -; RV32-BITS-512-NEXT: srli a2, a1, 31 -; RV32-BITS-512-NEXT: sb a2, 0(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 30 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 30(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 29 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 29(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 28 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 28(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 27 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 27(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 26 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 26(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 25 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 25(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 24 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 24(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 23 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 23(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 22 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 22(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 21 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 21(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 20 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 20(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 19 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 19(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 18 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 18(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 17 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 17(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 16 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 16(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 15 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 15(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 14 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 14(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 13 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 13(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 12 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 12(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 11 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 11(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 10 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 10(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 9 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 9(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 8 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 8(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 7 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 7(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 6 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 6(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 5 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 5(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 4 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 4(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 3 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 3(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 2 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 2(sp) -; RV32-BITS-512-NEXT: slli a1, a1, 1 +; RV32-BITS-512-NEXT: vmv.x.s a0, v0 +; RV32-BITS-512-NEXT: srli a1, a0, 31 +; RV32-BITS-512-NEXT: li a2, 32 +; RV32-BITS-512-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 1 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 4 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 5 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 6 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 7 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 8 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 9 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 10 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 11 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 12 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 13 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 14 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 15 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 16 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 17 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 18 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 19 ; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 1(sp) -; RV32-BITS-512-NEXT: mv a1, sp -; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV32-BITS-512-NEXT: vle8.v v8, (a1) +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 20 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 21 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 22 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 23 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 24 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 25 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 26 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 27 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 28 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a1, a0, 29 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: slli a0, a0, 30 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: vfirst.m a0, v0 +; RV32-BITS-512-NEXT: seqz a0, a0 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-512-NEXT: addi sp, s0, -64 -; RV32-BITS-512-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-BITS-512-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-BITS-512-NEXT: addi sp, sp, 64 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_v32i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -64 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 64 -; RV64-BITS-UNKNOWN-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-BITS-UNKNOWN-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8 -; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16 -; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 64 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -32 -; RV64-BITS-UNKNOWN-NEXT: li a0, 32 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vfirst.m a1, v0 -; RV64-BITS-UNKNOWN-NEXT: seqz a1, a1 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 31(sp) ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0 -; RV64-BITS-UNKNOWN-NEXT: srliw a2, a1, 31 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 0(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 62 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 30(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 61 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 29(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 60 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 28(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 59 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 27(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 58 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 26(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 57 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 25(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 56 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 24(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 55 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 23(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 54 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 22(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 53 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 21(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 52 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 20(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 51 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 19(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 50 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 18(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 49 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 17(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 48 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 16(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 47 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 15(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 46 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 14(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 45 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 13(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 44 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 12(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 43 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 11(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 42 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 10(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 41 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 9(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 40 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 8(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 39 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 7(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 38 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 6(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 37 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 5(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 36 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 4(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 35 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 3(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 34 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 2(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a1, 33 -; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp) -; RV64-BITS-UNKNOWN-NEXT: mv a1, sp -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31 +; RV64-BITS-UNKNOWN-NEXT: li a2, 32 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 33 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -64 -; RV64-BITS-UNKNOWN-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-BITS-UNKNOWN-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 64 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_v32i1: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: addi sp, sp, -64 -; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 64 -; RV64-BITS-256-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-BITS-256-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-BITS-256-NEXT: .cfi_offset ra, -8 -; RV64-BITS-256-NEXT: .cfi_offset s0, -16 -; RV64-BITS-256-NEXT: addi s0, sp, 64 -; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-256-NEXT: andi sp, sp, -32 -; RV64-BITS-256-NEXT: li a0, 32 -; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-BITS-256-NEXT: vfirst.m a1, v0 -; RV64-BITS-256-NEXT: seqz a1, a1 -; RV64-BITS-256-NEXT: sb a1, 31(sp) ; RV64-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV64-BITS-256-NEXT: vmv.x.s a1, v0 -; RV64-BITS-256-NEXT: srliw a2, a1, 31 -; RV64-BITS-256-NEXT: sb a2, 0(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 62 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 30(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 61 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 29(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 60 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 28(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 59 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 27(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 58 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 26(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 57 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 25(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 56 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 24(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 55 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 23(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 54 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 22(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 53 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 21(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 52 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 20(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 51 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 19(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 50 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 18(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 49 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 17(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 48 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 16(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 47 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 15(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 46 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 14(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 45 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 13(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 44 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 12(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 43 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 11(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 42 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 10(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 41 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 9(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 40 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 8(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 39 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 7(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 38 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 6(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 37 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 5(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 36 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 4(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 35 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 3(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 34 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 2(sp) -; RV64-BITS-256-NEXT: slli a1, a1, 33 -; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 1(sp) -; RV64-BITS-256-NEXT: mv a1, sp -; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-BITS-256-NEXT: vle8.v v8, (a1) +; RV64-BITS-256-NEXT: vmv.x.s a0, v0 +; RV64-BITS-256-NEXT: srliw a1, a0, 31 +; RV64-BITS-256-NEXT: li a2, 32 +; RV64-BITS-256-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 33 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 34 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 35 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 36 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 37 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 38 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 39 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 40 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 41 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 42 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 43 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 44 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 45 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 46 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 47 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 48 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 49 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 50 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 51 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 52 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 53 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 54 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 55 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 56 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 59 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a0, a0, 62 +; RV64-BITS-256-NEXT: srli a0, a0, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-256-NEXT: vfirst.m a0, v0 +; RV64-BITS-256-NEXT: seqz a0, a0 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-256-NEXT: addi sp, s0, -64 -; RV64-BITS-256-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-BITS-256-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-BITS-256-NEXT: addi sp, sp, 64 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_v32i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: addi sp, sp, -64 -; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 64 -; RV64-BITS-512-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-BITS-512-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-BITS-512-NEXT: .cfi_offset ra, -8 -; RV64-BITS-512-NEXT: .cfi_offset s0, -16 -; RV64-BITS-512-NEXT: addi s0, sp, 64 -; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-512-NEXT: andi sp, sp, -32 -; RV64-BITS-512-NEXT: li a0, 32 -; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-BITS-512-NEXT: vfirst.m a1, v0 -; RV64-BITS-512-NEXT: seqz a1, a1 -; RV64-BITS-512-NEXT: sb a1, 31(sp) ; RV64-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma -; RV64-BITS-512-NEXT: vmv.x.s a1, v0 -; RV64-BITS-512-NEXT: srliw a2, a1, 31 -; RV64-BITS-512-NEXT: sb a2, 0(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 62 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 30(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 61 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 29(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 60 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 28(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 59 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 27(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 58 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 26(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 57 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 25(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 56 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 24(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 55 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 23(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 54 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 22(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 53 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 21(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 52 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 20(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 51 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 19(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 50 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 18(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 49 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 17(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 48 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 16(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 47 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 15(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 46 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 14(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 45 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 13(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 44 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 12(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 43 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 11(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 42 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 10(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 41 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 9(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 40 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 8(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 39 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 7(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 38 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 6(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 37 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 5(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 36 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 4(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 35 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 3(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 34 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 2(sp) -; RV64-BITS-512-NEXT: slli a1, a1, 33 -; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 1(sp) -; RV64-BITS-512-NEXT: mv a1, sp -; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-BITS-512-NEXT: vle8.v v8, (a1) +; RV64-BITS-512-NEXT: vmv.x.s a0, v0 +; RV64-BITS-512-NEXT: srliw a1, a0, 31 +; RV64-BITS-512-NEXT: li a2, 32 +; RV64-BITS-512-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 33 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 34 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 35 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 36 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 37 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 38 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 39 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 40 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 41 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 42 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 43 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 44 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 45 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 46 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 47 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 48 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 49 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 50 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 51 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 52 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 53 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 54 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 55 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 56 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 59 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a0, a0, 62 +; RV64-BITS-512-NEXT: srli a0, a0, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vfirst.m a0, v0 +; RV64-BITS-512-NEXT: seqz a0, a0 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-512-NEXT: addi sp, s0, -64 -; RV64-BITS-512-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-BITS-512-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-BITS-512-NEXT: addi sp, sp, 64 ; RV64-BITS-512-NEXT: ret %res = call <32 x i1> @llvm.experimental.vector.reverse.v32i1(<32 x i1> %a) ret <32 x i1> %res @@ -1403,1301 +1241,1211 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v64i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -128 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 128 -; RV32-BITS-UNKNOWN-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32-BITS-UNKNOWN-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4 -; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8 -; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 128 -; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64 -; RV32-BITS-UNKNOWN-NEXT: li a0, 64 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vfirst.m a1, v0 -; RV32-BITS-UNKNOWN-NEXT: seqz a1, a1 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 63(sp) ; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0 +; RV32-BITS-UNKNOWN-NEXT: vslidedown.vi v8, v0, 1 +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v8 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 32(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30 +; RV32-BITS-UNKNOWN-NEXT: li a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 1 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 62(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 61(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 60(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 59(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 58(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 57(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 56(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 55(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 54(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 53(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 52(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 51(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 50(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 49(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 48(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 47(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 46(sp) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 ; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 45(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 44(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 43(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 42(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 41(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 40(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 39(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 38(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 37(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 36(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 35(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 34(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1 -; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 33(sp) -; RV32-BITS-UNKNOWN-NEXT: vslidedown.vi v8, v0, 1 -; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v8 -; RV32-BITS-UNKNOWN-NEXT: andi a2, a1, 1 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 31(sp) -; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 0(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 30(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 29(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 28(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 27(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 26(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 25(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 24(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 23(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 22(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 21(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 20(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19 -; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 19(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 18(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 17(sp) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 ; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 16 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 16(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 15 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 17 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 15(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 14 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 18 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 14(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 13 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 19 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 13(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 12 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 20 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 12(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 11 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 21 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 11(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 10 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 22 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 10(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 9 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 23 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 9(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 8 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 24 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 8(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 7 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 25 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 7(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 6 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 26 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 6(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 5 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 27 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 5(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 4 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 28 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 4(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 3 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 29 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 3(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 2 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a2, a1, 30 ; RV32-BITS-UNKNOWN-NEXT: srli a2, a2, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a2, 2(sp) -; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 1 -; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 -; RV32-BITS-UNKNOWN-NEXT: sb a1, 1(sp) -; RV32-BITS-UNKNOWN-NEXT: mv a1, sp +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: andi a1, a1, 1 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0 +; RV32-BITS-UNKNOWN-NEXT: srli a2, a1, 31 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 1 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 2 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 3 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 4 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 5 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 6 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 7 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 8 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 9 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 10 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 11 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 12 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 13 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 14 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 15 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 16 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 17 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 18 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 19 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 20 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 21 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 22 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 23 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 24 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 25 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 26 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 27 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 28 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a1, 29 +; RV32-BITS-UNKNOWN-NEXT: srli a0, a0, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: slli a1, a1, 30 +; RV32-BITS-UNKNOWN-NEXT: srli a1, a1, 31 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV32-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV32-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -128 -; RV32-BITS-UNKNOWN-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32-BITS-UNKNOWN-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 128 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_v64i1: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: addi sp, sp, -128 -; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 128 -; RV32-BITS-256-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32-BITS-256-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32-BITS-256-NEXT: .cfi_offset ra, -4 -; RV32-BITS-256-NEXT: .cfi_offset s0, -8 -; RV32-BITS-256-NEXT: addi s0, sp, 128 -; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-256-NEXT: andi sp, sp, -64 -; RV32-BITS-256-NEXT: li a0, 64 -; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-BITS-256-NEXT: vfirst.m a1, v0 -; RV32-BITS-256-NEXT: seqz a1, a1 -; RV32-BITS-256-NEXT: sb a1, 63(sp) ; RV32-BITS-256-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-BITS-256-NEXT: vmv.x.s a1, v0 -; RV32-BITS-256-NEXT: srli a2, a1, 31 -; RV32-BITS-256-NEXT: sb a2, 32(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 30 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 62(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 29 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 61(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 28 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 60(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 27 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 59(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 26 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 58(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 25 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 57(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 24 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 56(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 23 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 55(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 22 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 54(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 21 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 53(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 20 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 52(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 19 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 51(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 18 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 50(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 17 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 49(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 16 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 48(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 15 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 47(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 14 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 46(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 13 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 45(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 12 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 44(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 11 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 43(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 10 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 42(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 9 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 41(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 8 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 40(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 7 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 39(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 6 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 38(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 5 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 37(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 4 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 36(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 3 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 35(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 2 -; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 34(sp) -; RV32-BITS-256-NEXT: slli a1, a1, 1 -; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 33(sp) ; RV32-BITS-256-NEXT: vslidedown.vi v8, v0, 1 ; RV32-BITS-256-NEXT: vmv.x.s a1, v8 -; RV32-BITS-256-NEXT: andi a2, a1, 1 -; RV32-BITS-256-NEXT: sb a2, 31(sp) ; RV32-BITS-256-NEXT: srli a2, a1, 31 -; RV32-BITS-256-NEXT: sb a2, 0(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 30 +; RV32-BITS-256-NEXT: li a0, 64 +; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 1 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 30(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 29 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 2 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 29(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 28 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 3 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 28(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 27 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 4 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 27(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 26 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 5 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 26(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 25 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 6 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 25(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 24 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 7 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 24(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 23 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 8 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 23(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 22 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 9 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 22(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 21 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 10 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 21(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 20 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 11 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 20(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 19 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 12 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 19(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 18 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 13 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 18(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 17 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 14 +; RV32-BITS-256-NEXT: srli a2, a2, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 15 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 17(sp) +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 ; RV32-BITS-256-NEXT: slli a2, a1, 16 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 16(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 15 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 17 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 15(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 14 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 18 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 14(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 13 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 19 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 13(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 12 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 20 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 12(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 11 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 21 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 11(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 10 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 22 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 10(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 9 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 23 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 9(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 8 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 24 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 8(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 7 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 25 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 7(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 6 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 26 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 6(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 5 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 27 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 5(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 4 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 28 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 4(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 3 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 29 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 3(sp) -; RV32-BITS-256-NEXT: slli a2, a1, 2 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a2, a1, 30 ; RV32-BITS-256-NEXT: srli a2, a2, 31 -; RV32-BITS-256-NEXT: sb a2, 2(sp) -; RV32-BITS-256-NEXT: slli a1, a1, 1 -; RV32-BITS-256-NEXT: srli a1, a1, 31 -; RV32-BITS-256-NEXT: sb a1, 1(sp) -; RV32-BITS-256-NEXT: mv a1, sp +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: andi a1, a1, 1 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; RV32-BITS-256-NEXT: vmv.x.s a1, v0 +; RV32-BITS-256-NEXT: srli a2, a1, 31 ; RV32-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-BITS-256-NEXT: vle8.v v8, (a1) -; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 -; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-256-NEXT: addi sp, s0, -128 -; RV32-BITS-256-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32-BITS-256-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32-BITS-256-NEXT: addi sp, sp, 128 -; RV32-BITS-256-NEXT: ret +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-256-NEXT: slli a0, a1, 1 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 2 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 3 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 4 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 5 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 6 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 7 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 8 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 9 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 10 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 11 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 12 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 13 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 14 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 15 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 16 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 17 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 18 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 19 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 20 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 21 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 22 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 23 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 24 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 25 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 26 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 27 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 28 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a0, a1, 29 +; RV32-BITS-256-NEXT: srli a0, a0, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: slli a1, a1, 30 +; RV32-BITS-256-NEXT: srli a1, a1, 31 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-256-NEXT: vfirst.m a0, v0 +; RV32-BITS-256-NEXT: seqz a0, a0 +; RV32-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-256-NEXT: vand.vi v8, v8, 1 +; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_v64i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: addi sp, sp, -128 -; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 128 -; RV32-BITS-512-NEXT: sw ra, 124(sp) # 4-byte Folded Spill -; RV32-BITS-512-NEXT: sw s0, 120(sp) # 4-byte Folded Spill -; RV32-BITS-512-NEXT: .cfi_offset ra, -4 -; RV32-BITS-512-NEXT: .cfi_offset s0, -8 -; RV32-BITS-512-NEXT: addi s0, sp, 128 -; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0 -; RV32-BITS-512-NEXT: andi sp, sp, -64 -; RV32-BITS-512-NEXT: li a0, 64 -; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vfirst.m a1, v0 -; RV32-BITS-512-NEXT: seqz a1, a1 -; RV32-BITS-512-NEXT: sb a1, 63(sp) ; RV32-BITS-512-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-BITS-512-NEXT: vmv.x.s a1, v0 -; RV32-BITS-512-NEXT: srli a2, a1, 31 -; RV32-BITS-512-NEXT: sb a2, 32(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 30 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 62(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 29 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 61(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 28 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 60(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 27 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 59(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 26 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 58(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 25 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 57(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 24 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 56(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 23 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 55(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 22 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 54(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 21 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 53(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 20 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 52(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 19 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 51(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 18 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 50(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 17 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 49(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 16 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 48(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 15 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 47(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 14 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 46(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 13 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 45(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 12 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 44(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 11 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 43(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 10 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 42(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 9 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 41(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 8 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 40(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 7 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 39(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 6 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 38(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 5 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 37(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 4 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 36(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 3 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 35(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 2 -; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 34(sp) -; RV32-BITS-512-NEXT: slli a1, a1, 1 -; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 33(sp) ; RV32-BITS-512-NEXT: vslidedown.vi v8, v0, 1 ; RV32-BITS-512-NEXT: vmv.x.s a1, v8 -; RV32-BITS-512-NEXT: andi a2, a1, 1 -; RV32-BITS-512-NEXT: sb a2, 31(sp) ; RV32-BITS-512-NEXT: srli a2, a1, 31 -; RV32-BITS-512-NEXT: sb a2, 0(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 30 +; RV32-BITS-512-NEXT: li a0, 64 +; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 1 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 30(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 29 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 2 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 29(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 28 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 3 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 28(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 27 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 4 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 27(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 26 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 5 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 26(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 25 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 6 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 25(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 24 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 7 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 24(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 23 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 8 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 23(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 22 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 9 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 22(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 21 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 10 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 21(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 20 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 11 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 20(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 19 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 12 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 19(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 18 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 13 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 18(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 17 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 14 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 17(sp) +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 15 +; RV32-BITS-512-NEXT: srli a2, a2, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 ; RV32-BITS-512-NEXT: slli a2, a1, 16 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 16(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 15 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 17 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 15(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 14 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 18 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 14(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 13 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 19 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 13(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 12 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 20 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 12(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 11 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 21 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 11(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 10 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 22 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 10(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 9 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 23 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 9(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 8 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 24 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 8(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 7 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 25 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 7(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 6 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 26 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 6(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 5 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 27 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 5(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 4 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 28 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 4(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 3 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 29 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 3(sp) -; RV32-BITS-512-NEXT: slli a2, a1, 2 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a2, a1, 30 ; RV32-BITS-512-NEXT: srli a2, a2, 31 -; RV32-BITS-512-NEXT: sb a2, 2(sp) -; RV32-BITS-512-NEXT: slli a1, a1, 1 -; RV32-BITS-512-NEXT: srli a1, a1, 31 -; RV32-BITS-512-NEXT: sb a1, 1(sp) -; RV32-BITS-512-NEXT: mv a1, sp +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: andi a1, a1, 1 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: vsetivli zero, 0, e32, mf2, ta, ma +; RV32-BITS-512-NEXT: vmv.x.s a1, v0 +; RV32-BITS-512-NEXT: srli a2, a1, 31 ; RV32-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vle8.v v8, (a1) +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a2 +; RV32-BITS-512-NEXT: slli a0, a1, 1 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 2 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 3 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 4 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 5 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 6 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 7 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 8 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 9 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 10 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 11 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 12 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 13 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 14 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 15 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 16 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 17 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 18 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 19 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 20 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 21 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 22 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 23 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 24 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 25 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 26 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 27 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 28 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a0, a1, 29 +; RV32-BITS-512-NEXT: srli a0, a0, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV32-BITS-512-NEXT: slli a1, a1, 30 +; RV32-BITS-512-NEXT: srli a1, a1, 31 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV32-BITS-512-NEXT: vfirst.m a0, v0 +; RV32-BITS-512-NEXT: seqz a0, a0 +; RV32-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV32-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV32-BITS-512-NEXT: addi sp, s0, -128 -; RV32-BITS-512-NEXT: lw ra, 124(sp) # 4-byte Folded Reload -; RV32-BITS-512-NEXT: lw s0, 120(sp) # 4-byte Folded Reload -; RV32-BITS-512-NEXT: addi sp, sp, 128 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_v64i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -128 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 128 -; RV64-BITS-UNKNOWN-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; RV64-BITS-UNKNOWN-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8 -; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16 -; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 128 -; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64 -; RV64-BITS-UNKNOWN-NEXT: li a0, 64 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vfirst.m a1, v0 -; RV64-BITS-UNKNOWN-NEXT: seqz a1, a1 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 63(sp) ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a1, v0 -; RV64-BITS-UNKNOWN-NEXT: srliw a2, a1, 31 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 32(sp) -; RV64-BITS-UNKNOWN-NEXT: srli a2, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 0(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 62 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 62(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 61 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 61(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 60 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 60(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 59 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 59(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 58 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 58(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 57 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 57(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 56 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 56(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 55 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 55(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 54 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 54(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 53 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 53(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 52 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 52(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 51 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 51(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 50 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 50(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 49 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 49(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 48 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 48(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 47 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 47(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 46 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 46(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 45 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 45(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 44 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 44(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 43 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 43(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 42 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 42(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 41 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 41(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 40 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 40(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 39 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 39(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 38 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 38(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 37 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 37(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 36 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 36(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 35 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 35(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 34 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 34(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 33 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 33(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 31 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 31(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 30 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 30(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 29 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 29(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 28 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 28(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 27 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 27(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 26 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 26(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 25 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 25(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 24 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 24(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 23 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 23(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 22 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 22(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 21 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 21(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 20 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 20(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 19 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 19(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 18 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 18(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 17 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 17(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 16 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 16(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 15 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 15(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 14 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 14(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 13 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 13(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 12 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 12(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 11 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 11(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 10 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 10(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 9 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 9(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 8 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 8(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 7 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 7(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 6 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 6(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 5 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 5(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 4 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 4(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 3 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 3(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a2, a1, 2 -; RV64-BITS-UNKNOWN-NEXT: srli a2, a2, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a2, 2(sp) -; RV64-BITS-UNKNOWN-NEXT: slli a1, a1, 1 -; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 -; RV64-BITS-UNKNOWN-NEXT: sb a1, 1(sp) -; RV64-BITS-UNKNOWN-NEXT: mv a1, sp -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vle8.v v8, (a1) +; RV64-BITS-UNKNOWN-NEXT: vmv.x.s a0, v0 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: li a2, 64 +; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 6 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 7 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 8 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 9 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 10 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 11 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 12 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 13 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 14 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 15 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 17 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 18 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 19 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 20 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 21 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 22 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 23 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 24 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 25 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 26 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 27 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 28 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 29 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 30 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 31 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: srliw a1, a0, 31 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 33 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 34 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 35 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 36 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 37 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 38 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 39 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 40 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 41 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 42 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 43 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 44 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 45 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 46 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 47 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 48 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 49 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 50 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 51 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 52 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 53 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 54 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 55 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 56 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 57 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 58 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 59 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 60 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 61 +; RV64-BITS-UNKNOWN-NEXT: srli a1, a1, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 62 +; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 63 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vfirst.m a0, v0 +; RV64-BITS-UNKNOWN-NEXT: seqz a0, a0 +; RV64-BITS-UNKNOWN-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -128 -; RV64-BITS-UNKNOWN-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; RV64-BITS-UNKNOWN-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 128 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_v64i1: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: addi sp, sp, -128 -; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 128 -; RV64-BITS-256-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; RV64-BITS-256-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; RV64-BITS-256-NEXT: .cfi_offset ra, -8 -; RV64-BITS-256-NEXT: .cfi_offset s0, -16 -; RV64-BITS-256-NEXT: addi s0, sp, 128 -; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-256-NEXT: andi sp, sp, -64 -; RV64-BITS-256-NEXT: li a0, 64 -; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-BITS-256-NEXT: vfirst.m a1, v0 -; RV64-BITS-256-NEXT: seqz a1, a1 -; RV64-BITS-256-NEXT: sb a1, 63(sp) ; RV64-BITS-256-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-BITS-256-NEXT: vmv.x.s a1, v0 -; RV64-BITS-256-NEXT: srliw a2, a1, 31 -; RV64-BITS-256-NEXT: sb a2, 32(sp) -; RV64-BITS-256-NEXT: srli a2, a1, 63 -; RV64-BITS-256-NEXT: sb a2, 0(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 62 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 62(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 61 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 61(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 60 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 60(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 59 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 59(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 58 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 58(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 57 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 57(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 56 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 56(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 55 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 55(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 54 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 54(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 53 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 53(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 52 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 52(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 51 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 51(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 50 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 50(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 49 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 49(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 48 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 48(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 47 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 47(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 46 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 46(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 45 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 45(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 44 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 44(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 43 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 43(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 42 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 42(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 41 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 41(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 40 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 40(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 39 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 39(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 38 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 38(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 37 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 37(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 36 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 36(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 35 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 35(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 34 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 34(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 33 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 33(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 31 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 31(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 30 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 30(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 29 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 29(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 28 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 28(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 27 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 27(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 26 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 26(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 25 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 25(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 24 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 24(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 23 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 23(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 22 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 22(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 21 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 21(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 20 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 20(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 19 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 19(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 18 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 18(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 17 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 17(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 16 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 16(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 15 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 15(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 14 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 14(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 13 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 13(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 12 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 12(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 11 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 11(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 10 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 10(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 9 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 9(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 8 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 8(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 7 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 7(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 6 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 6(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 5 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 5(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 4 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 4(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 3 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 3(sp) -; RV64-BITS-256-NEXT: slli a2, a1, 2 -; RV64-BITS-256-NEXT: srli a2, a2, 63 -; RV64-BITS-256-NEXT: sb a2, 2(sp) -; RV64-BITS-256-NEXT: slli a1, a1, 1 -; RV64-BITS-256-NEXT: srli a1, a1, 63 -; RV64-BITS-256-NEXT: sb a1, 1(sp) -; RV64-BITS-256-NEXT: mv a1, sp -; RV64-BITS-256-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-BITS-256-NEXT: vle8.v v8, (a1) +; RV64-BITS-256-NEXT: vmv.x.s a0, v0 +; RV64-BITS-256-NEXT: srli a1, a0, 63 +; RV64-BITS-256-NEXT: li a2, 64 +; RV64-BITS-256-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 1 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 4 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 5 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 6 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 7 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 8 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 9 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 10 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 11 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 12 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 13 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 14 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 15 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 16 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 17 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 18 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 19 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 20 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 21 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 22 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 23 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 24 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 25 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 26 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 27 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 28 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 29 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 30 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 31 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: srliw a1, a0, 31 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 33 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 34 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 35 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 36 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 37 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 38 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 39 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 40 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 41 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 42 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 43 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 44 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 45 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 46 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 47 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 48 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 49 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 50 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 51 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 52 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 53 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 54 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 55 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 56 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 57 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 58 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 59 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 60 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a1, a0, 61 +; RV64-BITS-256-NEXT: srli a1, a1, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-256-NEXT: slli a0, a0, 62 +; RV64-BITS-256-NEXT: srli a0, a0, 63 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-256-NEXT: vfirst.m a0, v0 +; RV64-BITS-256-NEXT: seqz a0, a0 +; RV64-BITS-256-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-256-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-256-NEXT: addi sp, s0, -128 -; RV64-BITS-256-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; RV64-BITS-256-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; RV64-BITS-256-NEXT: addi sp, sp, 128 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_v64i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: addi sp, sp, -128 -; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 128 -; RV64-BITS-512-NEXT: sd ra, 120(sp) # 8-byte Folded Spill -; RV64-BITS-512-NEXT: sd s0, 112(sp) # 8-byte Folded Spill -; RV64-BITS-512-NEXT: .cfi_offset ra, -8 -; RV64-BITS-512-NEXT: .cfi_offset s0, -16 -; RV64-BITS-512-NEXT: addi s0, sp, 128 -; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0 -; RV64-BITS-512-NEXT: andi sp, sp, -64 -; RV64-BITS-512-NEXT: li a0, 64 -; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vfirst.m a1, v0 -; RV64-BITS-512-NEXT: seqz a1, a1 -; RV64-BITS-512-NEXT: sb a1, 63(sp) ; RV64-BITS-512-NEXT: vsetivli zero, 0, e64, m1, ta, ma -; RV64-BITS-512-NEXT: vmv.x.s a1, v0 -; RV64-BITS-512-NEXT: srliw a2, a1, 31 -; RV64-BITS-512-NEXT: sb a2, 32(sp) -; RV64-BITS-512-NEXT: srli a2, a1, 63 -; RV64-BITS-512-NEXT: sb a2, 0(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 62 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 62(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 61 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 61(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 60 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 60(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 59 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 59(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 58 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 58(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 57 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 57(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 56 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 56(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 55 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 55(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 54 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 54(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 53 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 53(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 52 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 52(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 51 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 51(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 50 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 50(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 49 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 49(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 48 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 48(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 47 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 47(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 46 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 46(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 45 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 45(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 44 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 44(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 43 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 43(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 42 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 42(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 41 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 41(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 40 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 40(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 39 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 39(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 38 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 38(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 37 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 37(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 36 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 36(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 35 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 35(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 34 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 34(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 33 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 33(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 31 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 31(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 30 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 30(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 29 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 29(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 28 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 28(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 27 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 27(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 26 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 26(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 25 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 25(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 24 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 24(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 23 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 23(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 22 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 22(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 21 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 21(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 20 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 20(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 19 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 19(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 18 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 18(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 17 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 17(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 16 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 16(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 15 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 15(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 14 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 14(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 13 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 13(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 12 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 12(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 11 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 11(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 10 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 10(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 9 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 9(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 8 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 8(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 7 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 7(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 6 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 6(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 5 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 5(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 4 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 4(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 3 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 3(sp) -; RV64-BITS-512-NEXT: slli a2, a1, 2 -; RV64-BITS-512-NEXT: srli a2, a2, 63 -; RV64-BITS-512-NEXT: sb a2, 2(sp) -; RV64-BITS-512-NEXT: slli a1, a1, 1 -; RV64-BITS-512-NEXT: srli a1, a1, 63 -; RV64-BITS-512-NEXT: sb a1, 1(sp) -; RV64-BITS-512-NEXT: mv a1, sp -; RV64-BITS-512-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vle8.v v8, (a1) +; RV64-BITS-512-NEXT: vmv.x.s a0, v0 +; RV64-BITS-512-NEXT: srli a1, a0, 63 +; RV64-BITS-512-NEXT: li a2, 64 +; RV64-BITS-512-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 1 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 2 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 3 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 4 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 5 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 6 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 7 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 8 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 9 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 10 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 11 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 12 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 13 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 14 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 15 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 16 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 17 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 18 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 19 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 20 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 21 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 22 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 23 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 24 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 25 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 26 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 27 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 28 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 29 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 30 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 31 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: srliw a1, a0, 31 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 33 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 34 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 35 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 36 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 37 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 38 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 39 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 40 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 41 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 42 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 43 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 44 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 45 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 46 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 47 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 48 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 49 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 50 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 51 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 52 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 53 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 54 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 55 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 56 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 57 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 58 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 59 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 60 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a1, a0, 61 +; RV64-BITS-512-NEXT: srli a1, a1, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a1 +; RV64-BITS-512-NEXT: slli a0, a0, 62 +; RV64-BITS-512-NEXT: srli a0, a0, 63 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vfirst.m a0, v0 +; RV64-BITS-512-NEXT: seqz a0, a0 +; RV64-BITS-512-NEXT: vslide1down.vx v8, v8, a0 ; RV64-BITS-512-NEXT: vand.vi v8, v8, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 -; RV64-BITS-512-NEXT: addi sp, s0, -128 -; RV64-BITS-512-NEXT: ld ra, 120(sp) # 8-byte Folded Reload -; RV64-BITS-512-NEXT: ld s0, 112(sp) # 8-byte Folded Reload -; RV64-BITS-512-NEXT: addi sp, sp, 128 ; RV64-BITS-512-NEXT: ret %res = call <64 x i1> @llvm.experimental.vector.reverse.v64i1(<64 x i1> %a) ret <64 x i1> %res diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -310,11 +310,11 @@ ; ; RV32-LABEL: fp2si_v8f64_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vfmv.f.s fa3, v8 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v12, v8, 1 +; RV32-NEXT: vfmv.f.s fa3, v12 ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; RV32-NEXT: lui a0, %hi(.LCPI12_1) @@ -325,63 +325,73 @@ ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 7 -; RV32-NEXT: vfmv.f.s fa3, v12 -; RV32-NEXT: feq.d a0, fa3, fa3 -; RV32-NEXT: neg a0, a0 +; RV32-NEXT: vfmv.f.s fa3, v8 +; RV32-NEXT: feq.d a2, fa3, fa3 +; RV32-NEXT: neg a2, a2 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 -; RV32-NEXT: fcvt.w.d a2, fa3, rtz -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 6 -; RV32-NEXT: vfmv.f.s fa3, v12 +; RV32-NEXT: fcvt.w.d a3, fa3, rtz +; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v8, a2 +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vfmv.f.s fa3, v16 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 5 -; RV32-NEXT: vfmv.f.s fa3, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vfmv.f.s fa3, v16 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 4 -; RV32-NEXT: vfmv.f.s fa3, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: vfmv.f.s fa3, v16 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 3 -; RV32-NEXT: vfmv.f.s fa3, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vfmv.f.s fa3, v16 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 2 -; RV32-NEXT: vfmv.f.s fa3, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vfmv.f.s fa3, v16 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: neg a0, a0 ; RV32-NEXT: fmax.d fa3, fa3, fa5 ; RV32-NEXT: fmin.d fa3, fa3, fa4 ; RV32-NEXT: fcvt.w.d a2, fa3, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa3, v8 ; RV32-NEXT: feq.d a0, fa3, fa3 ; RV32-NEXT: neg a0, a0 @@ -389,101 +399,105 @@ ; RV32-NEXT: fmin.d fa5, fa5, fa4 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: sb a0, 9(sp) -; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: vslide1down.vx v8, v12, a0 ; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: fp2si_v8f64_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vfmv.f.s fa3, v8 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v12, v8, 1 +; RV64-NEXT: vfmv.f.s fa3, v12 ; RV64-NEXT: lui a0, %hi(.LCPI12_0) ; RV64-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; RV64-NEXT: lui a0, %hi(.LCPI12_1) ; RV64-NEXT: fld fa4, %lo(.LCPI12_1)(a0) ; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 7 -; RV64-NEXT: vfmv.f.s fa3, v12 -; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: vfmv.f.s fa3, v8 +; RV64-NEXT: feq.d a2, fa3, fa3 +; RV64-NEXT: neg a2, a2 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 -; RV64-NEXT: fcvt.l.d a2, fa3, rtz -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 6 -; RV64-NEXT: vfmv.f.s fa3, v12 +; RV64-NEXT: fcvt.l.d a3, fa3, rtz +; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v8, a2 +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 2 +; RV64-NEXT: vfmv.f.s fa3, v16 ; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 5 -; RV64-NEXT: vfmv.f.s fa3, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 3 +; RV64-NEXT: vfmv.f.s fa3, v16 ; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 4 -; RV64-NEXT: vfmv.f.s fa3, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 4 +; RV64-NEXT: vfmv.f.s fa3, v16 ; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 3 -; RV64-NEXT: vfmv.f.s fa3, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 5 +; RV64-NEXT: vfmv.f.s fa3, v16 ; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 2 -; RV64-NEXT: vfmv.f.s fa3, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 6 +; RV64-NEXT: vfmv.f.s fa3, v16 ; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa3, fa3, fa5 ; RV64-NEXT: fmin.d fa3, fa3, fa4 ; RV64-NEXT: fcvt.l.d a2, fa3, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 7 ; RV64-NEXT: vfmv.f.s fa3, v8 ; RV64-NEXT: feq.d a0, fa3, fa3 -; RV64-NEXT: negw a0, a0 +; RV64-NEXT: neg a0, a0 ; RV64-NEXT: fmax.d fa5, fa3, fa5 ; RV64-NEXT: fmin.d fa5, fa5, fa4 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: vslide1down.vx v8, v12, a0 ; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %a = load <8 x double>, ptr %x %d = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> %a) @@ -496,8 +510,6 @@ ; ; RV32-LABEL: fp2ui_v8f64_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a0, %hi(.LCPI13_0) @@ -507,61 +519,69 @@ ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: sb a0, 8(sp) +; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v8, a0 ; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma -; RV32-NEXT: vslidedown.vi v12, v8, 7 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vslidedown.vi v16, v8, 1 +; RV32-NEXT: vfmv.f.s fa4, v16 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 6 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 2 +; RV32-NEXT: vfmv.f.s fa4, v16 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 5 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 3 +; RV32-NEXT: vfmv.f.s fa4, v16 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 4 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 4 +; RV32-NEXT: vfmv.f.s fa4, v16 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: sb a0, 12(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 3 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 5 +; RV32-NEXT: vfmv.f.s fa4, v16 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: vslidedown.vi v12, v8, 2 -; RV32-NEXT: vfmv.f.s fa4, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 6 +; RV32-NEXT: vfmv.f.s fa4, v16 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa4, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa4, rtz -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vslide1down.vx v12, v12, a0 +; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 7 ; RV32-NEXT: vfmv.f.s fa4, v8 ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa5, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa5, rtz -; RV32-NEXT: sb a0, 9(sp) -; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: vslide1down.vx v8, v12, a0 ; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: fp2ui_v8f64_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a0, %hi(.LCPI13_0) @@ -571,55 +591,65 @@ ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: sb a0, 8(sp) +; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v8, a0 ; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma -; RV64-NEXT: vslidedown.vi v12, v8, 7 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vslidedown.vi v16, v8, 1 +; RV64-NEXT: vfmv.f.s fa4, v16 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 6 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 2 +; RV64-NEXT: vfmv.f.s fa4, v16 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 5 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 3 +; RV64-NEXT: vfmv.f.s fa4, v16 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 4 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 4 +; RV64-NEXT: vfmv.f.s fa4, v16 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: sb a0, 12(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 3 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 5 +; RV64-NEXT: vfmv.f.s fa4, v16 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 2 -; RV64-NEXT: vfmv.f.s fa4, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 6 +; RV64-NEXT: vfmv.f.s fa4, v16 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa4, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa4, rtz -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vslide1down.vx v12, v12, a0 +; RV64-NEXT: vsetivli zero, 1, e64, m4, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 7 ; RV64-NEXT: vfmv.f.s fa4, v8 ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa5, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa5, rtz -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: vslide1down.vx v8, v12, a0 ; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %a = load <8 x double>, ptr %x %d = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f64(<8 x double> %a) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll @@ -216,86 +216,70 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) { ; LMULMAX8RV32-LABEL: si2fp_v3i7_v3f32: ; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: addi sp, sp, -16 -; LMULMAX8RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8RV32-NEXT: lw a1, 8(a0) -; LMULMAX8RV32-NEXT: sb a1, 14(sp) -; LMULMAX8RV32-NEXT: lw a1, 4(a0) -; LMULMAX8RV32-NEXT: sb a1, 13(sp) -; LMULMAX8RV32-NEXT: lw a0, 0(a0) -; LMULMAX8RV32-NEXT: sb a0, 12(sp) -; LMULMAX8RV32-NEXT: addi a0, sp, 12 +; LMULMAX8RV32-NEXT: lw a1, 0(a0) ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV32-NEXT: vle8.v v8, (a0) +; LMULMAX8RV32-NEXT: lw a2, 4(a0) +; LMULMAX8RV32-NEXT: lw a0, 8(a0) +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX8RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX8RV32-NEXT: vsra.vi v8, v8, 1 ; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vsext.vf2 v9, v8 ; LMULMAX8RV32-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8RV32-NEXT: addi sp, sp, 16 ; LMULMAX8RV32-NEXT: ret ; ; LMULMAX8RV64-LABEL: si2fp_v3i7_v3f32: ; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: addi sp, sp, -16 -; LMULMAX8RV64-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8RV64-NEXT: ld a1, 16(a0) -; LMULMAX8RV64-NEXT: sb a1, 14(sp) -; LMULMAX8RV64-NEXT: ld a1, 8(a0) -; LMULMAX8RV64-NEXT: sb a1, 13(sp) -; LMULMAX8RV64-NEXT: ld a0, 0(a0) -; LMULMAX8RV64-NEXT: sb a0, 12(sp) -; LMULMAX8RV64-NEXT: addi a0, sp, 12 +; LMULMAX8RV64-NEXT: ld a1, 0(a0) ; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV64-NEXT: vle8.v v8, (a0) +; LMULMAX8RV64-NEXT: ld a2, 8(a0) +; LMULMAX8RV64-NEXT: ld a0, 16(a0) +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX8RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX8RV64-NEXT: vsra.vi v8, v8, 1 ; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vsext.vf2 v9, v8 ; LMULMAX8RV64-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX8RV64-NEXT: addi sp, sp, 16 ; LMULMAX8RV64-NEXT: ret ; ; LMULMAX1RV32-LABEL: si2fp_v3i7_v3f32: ; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: addi sp, sp, -16 -; LMULMAX1RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1RV32-NEXT: lw a1, 8(a0) -; LMULMAX1RV32-NEXT: sb a1, 14(sp) -; LMULMAX1RV32-NEXT: lw a1, 4(a0) -; LMULMAX1RV32-NEXT: sb a1, 13(sp) -; LMULMAX1RV32-NEXT: lw a0, 0(a0) -; LMULMAX1RV32-NEXT: sb a0, 12(sp) -; LMULMAX1RV32-NEXT: addi a0, sp, 12 +; LMULMAX1RV32-NEXT: lw a1, 0(a0) ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV32-NEXT: vle8.v v8, (a0) +; LMULMAX1RV32-NEXT: lw a2, 4(a0) +; LMULMAX1RV32-NEXT: lw a0, 8(a0) +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX1RV32-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1RV32-NEXT: vsra.vi v8, v8, 1 ; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vsext.vf2 v9, v8 ; LMULMAX1RV32-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX1RV32-NEXT: addi sp, sp, 16 ; LMULMAX1RV32-NEXT: ret ; ; LMULMAX1RV64-LABEL: si2fp_v3i7_v3f32: ; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: addi sp, sp, -16 -; LMULMAX1RV64-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1RV64-NEXT: ld a1, 16(a0) -; LMULMAX1RV64-NEXT: sb a1, 14(sp) -; LMULMAX1RV64-NEXT: ld a1, 8(a0) -; LMULMAX1RV64-NEXT: sb a1, 13(sp) -; LMULMAX1RV64-NEXT: ld a0, 0(a0) -; LMULMAX1RV64-NEXT: sb a0, 12(sp) -; LMULMAX1RV64-NEXT: addi a0, sp, 12 +; LMULMAX1RV64-NEXT: ld a1, 0(a0) ; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV64-NEXT: vle8.v v8, (a0) +; LMULMAX1RV64-NEXT: ld a2, 8(a0) +; LMULMAX1RV64-NEXT: ld a0, 16(a0) +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX1RV64-NEXT: vadd.vv v8, v8, v8 ; LMULMAX1RV64-NEXT: vsra.vi v8, v8, 1 ; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vsext.vf2 v9, v8 ; LMULMAX1RV64-NEXT: vfwcvt.f.x.v v8, v9 -; LMULMAX1RV64-NEXT: addi sp, sp, 16 ; LMULMAX1RV64-NEXT: ret %z = sitofp <3 x i7> %x to <3 x float> ret <3 x float> %z @@ -305,86 +289,70 @@ define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) { ; LMULMAX8RV32-LABEL: ui2fp_v3i7_v3f32: ; LMULMAX8RV32: # %bb.0: -; LMULMAX8RV32-NEXT: addi sp, sp, -16 -; LMULMAX8RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8RV32-NEXT: lw a1, 8(a0) -; LMULMAX8RV32-NEXT: sb a1, 14(sp) -; LMULMAX8RV32-NEXT: lw a1, 4(a0) -; LMULMAX8RV32-NEXT: sb a1, 13(sp) -; LMULMAX8RV32-NEXT: lw a0, 0(a0) -; LMULMAX8RV32-NEXT: sb a0, 12(sp) -; LMULMAX8RV32-NEXT: addi a0, sp, 12 +; LMULMAX8RV32-NEXT: lw a1, 0(a0) ; LMULMAX8RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV32-NEXT: vle8.v v8, (a0) +; LMULMAX8RV32-NEXT: lw a2, 4(a0) +; LMULMAX8RV32-NEXT: lw a0, 8(a0) +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX8RV32-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX8RV32-NEXT: li a0, 127 ; LMULMAX8RV32-NEXT: vand.vx v8, v8, a0 ; LMULMAX8RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX8RV32-NEXT: vzext.vf2 v9, v8 ; LMULMAX8RV32-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8RV32-NEXT: addi sp, sp, 16 ; LMULMAX8RV32-NEXT: ret ; ; LMULMAX8RV64-LABEL: ui2fp_v3i7_v3f32: ; LMULMAX8RV64: # %bb.0: -; LMULMAX8RV64-NEXT: addi sp, sp, -16 -; LMULMAX8RV64-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8RV64-NEXT: ld a1, 16(a0) -; LMULMAX8RV64-NEXT: sb a1, 14(sp) -; LMULMAX8RV64-NEXT: ld a1, 8(a0) -; LMULMAX8RV64-NEXT: sb a1, 13(sp) -; LMULMAX8RV64-NEXT: ld a0, 0(a0) -; LMULMAX8RV64-NEXT: sb a0, 12(sp) -; LMULMAX8RV64-NEXT: addi a0, sp, 12 +; LMULMAX8RV64-NEXT: ld a1, 0(a0) ; LMULMAX8RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX8RV64-NEXT: vle8.v v8, (a0) +; LMULMAX8RV64-NEXT: ld a2, 8(a0) +; LMULMAX8RV64-NEXT: ld a0, 16(a0) +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX8RV64-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX8RV64-NEXT: li a0, 127 ; LMULMAX8RV64-NEXT: vand.vx v8, v8, a0 ; LMULMAX8RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX8RV64-NEXT: vzext.vf2 v9, v8 ; LMULMAX8RV64-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX8RV64-NEXT: addi sp, sp, 16 ; LMULMAX8RV64-NEXT: ret ; ; LMULMAX1RV32-LABEL: ui2fp_v3i7_v3f32: ; LMULMAX1RV32: # %bb.0: -; LMULMAX1RV32-NEXT: addi sp, sp, -16 -; LMULMAX1RV32-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1RV32-NEXT: lw a1, 8(a0) -; LMULMAX1RV32-NEXT: sb a1, 14(sp) -; LMULMAX1RV32-NEXT: lw a1, 4(a0) -; LMULMAX1RV32-NEXT: sb a1, 13(sp) -; LMULMAX1RV32-NEXT: lw a0, 0(a0) -; LMULMAX1RV32-NEXT: sb a0, 12(sp) -; LMULMAX1RV32-NEXT: addi a0, sp, 12 +; LMULMAX1RV32-NEXT: lw a1, 0(a0) ; LMULMAX1RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV32-NEXT: vle8.v v8, (a0) +; LMULMAX1RV32-NEXT: lw a2, 4(a0) +; LMULMAX1RV32-NEXT: lw a0, 8(a0) +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX1RV32-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX1RV32-NEXT: li a0, 127 ; LMULMAX1RV32-NEXT: vand.vx v8, v8, a0 ; LMULMAX1RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX1RV32-NEXT: vzext.vf2 v9, v8 ; LMULMAX1RV32-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX1RV32-NEXT: addi sp, sp, 16 ; LMULMAX1RV32-NEXT: ret ; ; LMULMAX1RV64-LABEL: ui2fp_v3i7_v3f32: ; LMULMAX1RV64: # %bb.0: -; LMULMAX1RV64-NEXT: addi sp, sp, -16 -; LMULMAX1RV64-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1RV64-NEXT: ld a1, 16(a0) -; LMULMAX1RV64-NEXT: sb a1, 14(sp) -; LMULMAX1RV64-NEXT: ld a1, 8(a0) -; LMULMAX1RV64-NEXT: sb a1, 13(sp) -; LMULMAX1RV64-NEXT: ld a0, 0(a0) -; LMULMAX1RV64-NEXT: sb a0, 12(sp) -; LMULMAX1RV64-NEXT: addi a0, sp, 12 +; LMULMAX1RV64-NEXT: ld a1, 0(a0) ; LMULMAX1RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; LMULMAX1RV64-NEXT: vle8.v v8, (a0) +; LMULMAX1RV64-NEXT: ld a2, 8(a0) +; LMULMAX1RV64-NEXT: ld a0, 16(a0) +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a1 +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a2 +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0 +; LMULMAX1RV64-NEXT: vslide1down.vx v8, v8, a0 ; LMULMAX1RV64-NEXT: li a0, 127 ; LMULMAX1RV64-NEXT: vand.vx v8, v8, a0 ; LMULMAX1RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; LMULMAX1RV64-NEXT: vzext.vf2 v9, v8 ; LMULMAX1RV64-NEXT: vfwcvt.f.xu.v v8, v9 -; LMULMAX1RV64-NEXT: addi sp, sp, 16 ; LMULMAX1RV64-NEXT: ret %z = uitofp <3 x i7> %x to <3 x float> ret <3 x float> %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll @@ -21,8 +21,6 @@ define <5 x i8> @load_v5i8_align1(ptr %p) { ; RV32-LABEL: load_v5i8_align1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: lbu a1, 1(a0) ; RV32-NEXT: lbu a2, 0(a0) ; RV32-NEXT: lbu a3, 2(a0) @@ -35,29 +33,28 @@ ; RV32-NEXT: or a1, a3, a1 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vslidedown.vi v9, v8, 3 +; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vslidedown.vi v9, v8, 2 +; RV32-NEXT: vmv.x.s a2, v9 ; RV32-NEXT: vslidedown.vi v9, v8, 1 -; RV32-NEXT: vslidedown.vi v10, v8, 2 -; RV32-NEXT: vslidedown.vi v11, v8, 3 +; RV32-NEXT: vmv.x.s a3, v9 +; RV32-NEXT: vmv.x.s a4, v8 ; RV32-NEXT: lb a0, 4(a0) -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; RV32-NEXT: vse8.v v8, (a1) -; RV32-NEXT: addi a2, sp, 11 -; RV32-NEXT: vse8.v v11, (a2) -; RV32-NEXT: addi a2, sp, 10 -; RV32-NEXT: vse8.v v10, (a2) -; RV32-NEXT: addi a2, sp, 9 -; RV32-NEXT: vse8.v v9, (a2) -; RV32-NEXT: sb a0, 12(sp) ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: vslide1down.vx v8, v8, a4 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: load_v5i8_align1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: lbu a1, 1(a0) ; RV64-NEXT: lbu a2, 0(a0) ; RV64-NEXT: lbu a3, 2(a0) @@ -70,23 +67,24 @@ ; RV64-NEXT: or a1, a3, a1 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vslidedown.vi v9, v8, 3 +; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma +; RV64-NEXT: vmv.x.s a1, v9 +; RV64-NEXT: vslidedown.vi v9, v8, 2 +; RV64-NEXT: vmv.x.s a2, v9 ; RV64-NEXT: vslidedown.vi v9, v8, 1 -; RV64-NEXT: vslidedown.vi v10, v8, 2 -; RV64-NEXT: vslidedown.vi v11, v8, 3 +; RV64-NEXT: vmv.x.s a3, v9 +; RV64-NEXT: vmv.x.s a4, v8 ; RV64-NEXT: lb a0, 4(a0) -; RV64-NEXT: addi a1, sp, 8 -; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma -; RV64-NEXT: vse8.v v8, (a1) -; RV64-NEXT: addi a2, sp, 11 -; RV64-NEXT: vse8.v v11, (a2) -; RV64-NEXT: addi a2, sp, 10 -; RV64-NEXT: vse8.v v10, (a2) -; RV64-NEXT: addi a2, sp, 9 -; RV64-NEXT: vse8.v v9, (a2) -; RV64-NEXT: sb a0, 12(sp) ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vslide1down.vx v8, v8, a4 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: ret %x = load <5 x i8>, ptr %p, align 1 ret <5 x i8> %x @@ -180,60 +178,54 @@ define <6 x i1> @load_v6i1(ptr %p) { ; RV32-LABEL: load_v6i1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: lbu a0, 0(a0) -; RV32-NEXT: slli a1, a0, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: slli a2, a0, 29 +; RV32-NEXT: srli a1, a0, 5 +; RV32-NEXT: slli a2, a0, 27 ; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: slli a3, a0, 28 ; RV32-NEXT: srli a3, a3, 31 -; RV32-NEXT: slli a4, a0, 27 +; RV32-NEXT: slli a4, a0, 29 ; RV32-NEXT: srli a4, a4, 31 -; RV32-NEXT: andi a5, a0, 1 -; RV32-NEXT: srli a0, a0, 5 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: sb a5, 8(sp) -; RV32-NEXT: sb a4, 12(sp) -; RV32-NEXT: sb a3, 11(sp) -; RV32-NEXT: sb a2, 10(sp) -; RV32-NEXT: sb a1, 9(sp) -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: slli a5, a0, 30 +; RV32-NEXT: srli a5, a5, 31 +; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vle8.v v8, (a0) +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a5 +; RV32-NEXT: vslide1down.vx v8, v8, a4 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vand.vi v8, v8, 1 ; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: load_v6i1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: lbu a0, 0(a0) -; RV64-NEXT: slli a1, a0, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: slli a2, a0, 61 +; RV64-NEXT: srli a1, a0, 5 +; RV64-NEXT: slli a2, a0, 59 ; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: slli a3, a0, 60 ; RV64-NEXT: srli a3, a3, 63 -; RV64-NEXT: slli a4, a0, 59 +; RV64-NEXT: slli a4, a0, 61 ; RV64-NEXT: srli a4, a4, 63 -; RV64-NEXT: andi a5, a0, 1 -; RV64-NEXT: srli a0, a0, 5 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: sb a5, 8(sp) -; RV64-NEXT: sb a4, 12(sp) -; RV64-NEXT: sb a3, 11(sp) -; RV64-NEXT: sb a2, 10(sp) -; RV64-NEXT: sb a1, 9(sp) -; RV64-NEXT: addi a0, sp, 8 +; RV64-NEXT: slli a5, a0, 62 +; RV64-NEXT: srli a5, a5, 63 +; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vle8.v v8, (a0) +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a5 +; RV64-NEXT: vslide1down.vx v8, v8, a4 +; RV64-NEXT: vslide1down.vx v8, v8, a3 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: vand.vi v8, v8, 1 ; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %x = load <6 x i1>, ptr %p ret <6 x i1> %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -81,30 +81,20 @@ define <2 x i1> @buildvec_mask_optsize_nonconst_v2i1(i1 %x, i1 %y) optsize { ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sb a1, 15(sp) -; CHECK-NEXT: sb a0, 14(sp) -; CHECK-NEXT: addi a0, sp, 14 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v2i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: addi sp, sp, -16 -; ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; ZVE32F-NEXT: sb a1, 15(sp) -; ZVE32F-NEXT: sb a0, 14(sp) -; ZVE32F-NEXT: addi a0, sp, 14 ; ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 -; ZVE32F-NEXT: addi sp, sp, 16 ; ZVE32F-NEXT: ret %1 = insertelement <2 x i1> poison, i1 %x, i32 0 %2 = insertelement <2 x i1> %1, i1 %y, i32 1 @@ -195,34 +185,24 @@ define <4 x i1> @buildvec_mask_optsize_nonconst_v4i1(i1 %x, i1 %y) optsize { ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sb a1, 15(sp) -; CHECK-NEXT: sb a1, 14(sp) -; CHECK-NEXT: sb a0, 13(sp) -; CHECK-NEXT: sb a0, 12(sp) -; CHECK-NEXT: addi a0, sp, 12 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v4i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: addi sp, sp, -16 -; ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; ZVE32F-NEXT: sb a1, 15(sp) -; ZVE32F-NEXT: sb a1, 14(sp) -; ZVE32F-NEXT: sb a0, 13(sp) -; ZVE32F-NEXT: sb a0, 12(sp) -; ZVE32F-NEXT: addi a0, sp, 12 ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 -; ZVE32F-NEXT: addi sp, sp, 16 ; ZVE32F-NEXT: ret %1 = insertelement <4 x i1> poison, i1 %x, i32 0 %2 = insertelement <4 x i1> %1, i1 %x, i32 1 @@ -234,36 +214,26 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1_2(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v4i1_2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sb a1, 15(sp) -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: sb a1, 14(sp) -; CHECK-NEXT: sb a0, 13(sp) -; CHECK-NEXT: sb zero, 12(sp) -; CHECK-NEXT: addi a0, sp, 12 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, zero +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1_2: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: addi sp, sp, -16 -; ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; ZVE32F-NEXT: sb a1, 15(sp) -; ZVE32F-NEXT: li a1, 1 -; ZVE32F-NEXT: sb a1, 14(sp) -; ZVE32F-NEXT: sb a0, 13(sp) -; ZVE32F-NEXT: sb zero, 12(sp) -; ZVE32F-NEXT: addi a0, sp, 12 ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vslide1down.vx v8, v8, zero +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: li a0, 1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 -; ZVE32F-NEXT: addi sp, sp, 16 ; ZVE32F-NEXT: ret %1 = insertelement <4 x i1> poison, i1 0, i32 0 %2 = insertelement <4 x i1> %1, i1 %x, i32 1 @@ -325,44 +295,34 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1_2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sb a2, 15(sp) -; CHECK-NEXT: sb zero, 14(sp) -; CHECK-NEXT: sb a3, 13(sp) -; CHECK-NEXT: sb a0, 12(sp) -; CHECK-NEXT: sb a1, 11(sp) -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: sb a1, 10(sp) -; CHECK-NEXT: sb a0, 9(sp) -; CHECK-NEXT: sb a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: li a4, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, zero +; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1_2: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: addi sp, sp, -16 -; ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; ZVE32F-NEXT: sb a2, 15(sp) -; ZVE32F-NEXT: sb zero, 14(sp) -; ZVE32F-NEXT: sb a3, 13(sp) -; ZVE32F-NEXT: sb a0, 12(sp) -; ZVE32F-NEXT: sb a1, 11(sp) -; ZVE32F-NEXT: li a1, 1 -; ZVE32F-NEXT: sb a1, 10(sp) -; ZVE32F-NEXT: sb a0, 9(sp) -; ZVE32F-NEXT: sb a0, 8(sp) -; ZVE32F-NEXT: addi a0, sp, 8 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: li a4, 1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; ZVE32F-NEXT: vslide1down.vx v8, v8, zero +; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 -; ZVE32F-NEXT: addi sp, sp, 16 ; ZVE32F-NEXT: ret %1 = insertelement <8 x i1> poison, i1 %x, i32 0 %2 = insertelement <8 x i1> %1, i1 %x, i32 1 @@ -378,44 +338,34 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) optsize { ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1_2: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sb a2, 15(sp) -; CHECK-NEXT: sb zero, 14(sp) -; CHECK-NEXT: sb a3, 13(sp) -; CHECK-NEXT: sb a0, 12(sp) -; CHECK-NEXT: sb a1, 11(sp) -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: sb a1, 10(sp) -; CHECK-NEXT: sb a0, 9(sp) -; CHECK-NEXT: sb a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: li a4, 1 +; CHECK-NEXT: vslide1down.vx v8, v8, a4 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vslide1down.vx v8, v8, zero +; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1_2: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: addi sp, sp, -16 -; ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; ZVE32F-NEXT: sb a2, 15(sp) -; ZVE32F-NEXT: sb zero, 14(sp) -; ZVE32F-NEXT: sb a3, 13(sp) -; ZVE32F-NEXT: sb a0, 12(sp) -; ZVE32F-NEXT: sb a1, 11(sp) -; ZVE32F-NEXT: li a1, 1 -; ZVE32F-NEXT: sb a1, 10(sp) -; ZVE32F-NEXT: sb a0, 9(sp) -; ZVE32F-NEXT: sb a0, 8(sp) -; ZVE32F-NEXT: addi a0, sp, 8 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: li a4, 1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; ZVE32F-NEXT: vslide1down.vx v8, v8, zero +; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 -; ZVE32F-NEXT: addi sp, sp, 16 ; ZVE32F-NEXT: ret %1 = insertelement <8 x i1> poison, i1 %x, i32 0 %2 = insertelement <8 x i1> %1, i1 %x, i32 1 @@ -431,42 +381,32 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK-LABEL: buildvec_mask_optsize_nonconst_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: sb a1, 15(sp) -; CHECK-NEXT: sb a1, 14(sp) -; CHECK-NEXT: sb a1, 13(sp) -; CHECK-NEXT: sb a0, 12(sp) -; CHECK-NEXT: sb a1, 11(sp) -; CHECK-NEXT: sb a1, 10(sp) -; CHECK-NEXT: sb a0, 9(sp) -; CHECK-NEXT: sb a0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_optsize_nonconst_v8i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: addi sp, sp, -16 -; ZVE32F-NEXT: .cfi_def_cfa_offset 16 -; ZVE32F-NEXT: sb a1, 15(sp) -; ZVE32F-NEXT: sb a1, 14(sp) -; ZVE32F-NEXT: sb a1, 13(sp) -; ZVE32F-NEXT: sb a0, 12(sp) -; ZVE32F-NEXT: sb a1, 11(sp) -; ZVE32F-NEXT: sb a1, 10(sp) -; ZVE32F-NEXT: sb a0, 9(sp) -; ZVE32F-NEXT: sb a0, 8(sp) -; ZVE32F-NEXT: addi a0, sp, 8 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 -; ZVE32F-NEXT: addi sp, sp, 16 ; ZVE32F-NEXT: ret %1 = insertelement <8 x i1> poison, i1 %x, i32 0 %2 = insertelement <8 x i1> %1, i1 %x, i32 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -6775,191 +6775,156 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -96 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 96 -; RV32ZVE32F-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s2, 84(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s3, 80(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s4, 76(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s5, 72(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s6, 68(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s7, 64(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s8, 60(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s9, 56(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s10, 52(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s11, 48(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset ra, -4 -; RV32ZVE32F-NEXT: .cfi_offset s0, -8 -; RV32ZVE32F-NEXT: .cfi_offset s2, -12 -; RV32ZVE32F-NEXT: .cfi_offset s3, -16 -; RV32ZVE32F-NEXT: .cfi_offset s4, -20 -; RV32ZVE32F-NEXT: .cfi_offset s5, -24 -; RV32ZVE32F-NEXT: .cfi_offset s6, -28 -; RV32ZVE32F-NEXT: .cfi_offset s7, -32 -; RV32ZVE32F-NEXT: .cfi_offset s8, -36 -; RV32ZVE32F-NEXT: .cfi_offset s9, -40 -; RV32ZVE32F-NEXT: .cfi_offset s10, -44 -; RV32ZVE32F-NEXT: .cfi_offset s11, -48 -; RV32ZVE32F-NEXT: addi s0, sp, 96 -; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 -; RV32ZVE32F-NEXT: andi sp, sp, -32 -; RV32ZVE32F-NEXT: lw a4, 60(a3) -; RV32ZVE32F-NEXT: lw a5, 56(a3) -; RV32ZVE32F-NEXT: lw a6, 52(a3) -; RV32ZVE32F-NEXT: lw a7, 48(a3) -; RV32ZVE32F-NEXT: lw t0, 44(a3) -; RV32ZVE32F-NEXT: lw t1, 40(a3) -; RV32ZVE32F-NEXT: lw t2, 36(a3) -; RV32ZVE32F-NEXT: lw t3, 32(a3) -; RV32ZVE32F-NEXT: lw t4, 28(a3) -; RV32ZVE32F-NEXT: lw t5, 24(a3) -; RV32ZVE32F-NEXT: lw t6, 20(a3) -; RV32ZVE32F-NEXT: lw s2, 16(a3) -; RV32ZVE32F-NEXT: lw s3, 12(a3) -; RV32ZVE32F-NEXT: lw s5, 8(a3) -; RV32ZVE32F-NEXT: lw s4, 4(a3) -; RV32ZVE32F-NEXT: lw a3, 0(a3) -; RV32ZVE32F-NEXT: lw s6, 0(a2) -; RV32ZVE32F-NEXT: lw s7, 8(a2) -; RV32ZVE32F-NEXT: lw s8, 16(a2) -; RV32ZVE32F-NEXT: lw s9, 24(a2) -; RV32ZVE32F-NEXT: lw s10, 56(a2) -; RV32ZVE32F-NEXT: lw s11, 48(a2) -; RV32ZVE32F-NEXT: lw ra, 40(a2) -; RV32ZVE32F-NEXT: lw a2, 32(a2) -; RV32ZVE32F-NEXT: sw s10, 28(sp) -; RV32ZVE32F-NEXT: sw s11, 24(sp) -; RV32ZVE32F-NEXT: sw ra, 20(sp) -; RV32ZVE32F-NEXT: sw a2, 16(sp) -; RV32ZVE32F-NEXT: sw s9, 12(sp) -; RV32ZVE32F-NEXT: sw s8, 8(sp) -; RV32ZVE32F-NEXT: sw s7, 4(sp) -; RV32ZVE32F-NEXT: sw s6, 0(sp) -; RV32ZVE32F-NEXT: mv a2, sp +; RV32ZVE32F-NEXT: addi sp, sp, -16 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16 +; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 +; RV32ZVE32F-NEXT: lw a4, 56(a2) +; RV32ZVE32F-NEXT: lw a5, 48(a2) +; RV32ZVE32F-NEXT: lw a6, 40(a2) +; RV32ZVE32F-NEXT: lw a7, 32(a2) +; RV32ZVE32F-NEXT: lw t0, 24(a2) +; RV32ZVE32F-NEXT: lw t1, 0(a2) +; RV32ZVE32F-NEXT: lw t2, 8(a2) +; RV32ZVE32F-NEXT: lw a2, 16(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vle32.v v8, (a2) +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 -; RV32ZVE32F-NEXT: bnez a2, .LBB57_10 -; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: bnez a2, .LBB57_11 -; RV32ZVE32F-NEXT: .LBB57_2: # %else2 -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: bnez a2, .LBB57_12 -; RV32ZVE32F-NEXT: .LBB57_3: # %else5 -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: bnez a2, .LBB57_13 -; RV32ZVE32F-NEXT: .LBB57_4: # %else8 -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: bnez a2, .LBB57_14 -; RV32ZVE32F-NEXT: .LBB57_5: # %else11 -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: bnez a2, .LBB57_15 -; RV32ZVE32F-NEXT: .LBB57_6: # %else14 -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: bnez a2, .LBB57_16 -; RV32ZVE32F-NEXT: .LBB57_7: # %else17 -; RV32ZVE32F-NEXT: andi a1, a1, -128 +; RV32ZVE32F-NEXT: vmv.x.s t0, v0 +; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB57_9 -; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load19 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: lw a4, 4(a1) -; RV32ZVE32F-NEXT: lw a5, 0(a1) -; RV32ZVE32F-NEXT: .LBB57_9: # %else20 -; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw s4, 4(a0) -; RV32ZVE32F-NEXT: sw s5, 8(a0) -; RV32ZVE32F-NEXT: sw s3, 12(a0) -; RV32ZVE32F-NEXT: sw s2, 16(a0) -; RV32ZVE32F-NEXT: sw t6, 20(a0) -; RV32ZVE32F-NEXT: sw t5, 24(a0) -; RV32ZVE32F-NEXT: sw t4, 28(a0) -; RV32ZVE32F-NEXT: sw t3, 32(a0) -; RV32ZVE32F-NEXT: sw t2, 36(a0) -; RV32ZVE32F-NEXT: sw t1, 40(a0) -; RV32ZVE32F-NEXT: sw t0, 44(a0) -; RV32ZVE32F-NEXT: sw a7, 48(a0) -; RV32ZVE32F-NEXT: sw a6, 52(a0) -; RV32ZVE32F-NEXT: sw a5, 56(a0) -; RV32ZVE32F-NEXT: sw a4, 60(a0) -; RV32ZVE32F-NEXT: addi sp, s0, -96 -; RV32ZVE32F-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s2, 84(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s3, 80(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s4, 76(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s5, 72(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s6, 68(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s7, 64(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s8, 60(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s9, 56(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s10, 52(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s11, 48(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: addi sp, sp, 96 -; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load +; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw s4, 4(a2) -; RV32ZVE32F-NEXT: lw a3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 2 -; RV32ZVE32F-NEXT: beqz a2, .LBB57_2 -; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load1 +; RV32ZVE32F-NEXT: lw a1, 4(a2) +; RV32ZVE32F-NEXT: lw a2, 0(a2) +; RV32ZVE32F-NEXT: andi a4, t0, 2 +; RV32ZVE32F-NEXT: bnez a4, .LBB57_10 +; RV32ZVE32F-NEXT: .LBB57_2: +; RV32ZVE32F-NEXT: lw a4, 12(a3) +; RV32ZVE32F-NEXT: lw a5, 8(a3) +; RV32ZVE32F-NEXT: andi a6, t0, 4 +; RV32ZVE32F-NEXT: bnez a6, .LBB57_11 +; RV32ZVE32F-NEXT: .LBB57_3: +; RV32ZVE32F-NEXT: lw a6, 20(a3) +; RV32ZVE32F-NEXT: lw a7, 16(a3) +; RV32ZVE32F-NEXT: andi t1, t0, 8 +; RV32ZVE32F-NEXT: bnez t1, .LBB57_12 +; RV32ZVE32F-NEXT: .LBB57_4: +; RV32ZVE32F-NEXT: lw t1, 28(a3) +; RV32ZVE32F-NEXT: lw t2, 24(a3) +; RV32ZVE32F-NEXT: andi t3, t0, 16 +; RV32ZVE32F-NEXT: bnez t3, .LBB57_13 +; RV32ZVE32F-NEXT: .LBB57_5: +; RV32ZVE32F-NEXT: lw t3, 36(a3) +; RV32ZVE32F-NEXT: lw t4, 32(a3) +; RV32ZVE32F-NEXT: andi t5, t0, 32 +; RV32ZVE32F-NEXT: bnez t5, .LBB57_14 +; RV32ZVE32F-NEXT: .LBB57_6: +; RV32ZVE32F-NEXT: lw t5, 44(a3) +; RV32ZVE32F-NEXT: lw t6, 40(a3) +; RV32ZVE32F-NEXT: andi s0, t0, 64 +; RV32ZVE32F-NEXT: bnez s0, .LBB57_15 +; RV32ZVE32F-NEXT: .LBB57_7: +; RV32ZVE32F-NEXT: lw s0, 52(a3) +; RV32ZVE32F-NEXT: lw s1, 48(a3) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: bnez t0, .LBB57_16 +; RV32ZVE32F-NEXT: .LBB57_8: +; RV32ZVE32F-NEXT: lw t0, 60(a3) +; RV32ZVE32F-NEXT: lw a3, 56(a3) +; RV32ZVE32F-NEXT: j .LBB57_17 +; RV32ZVE32F-NEXT: .LBB57_9: +; RV32ZVE32F-NEXT: lw a1, 4(a3) +; RV32ZVE32F-NEXT: lw a2, 0(a3) +; RV32ZVE32F-NEXT: andi a4, t0, 2 +; RV32ZVE32F-NEXT: beqz a4, .LBB57_2 +; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: lw s3, 4(a2) -; RV32ZVE32F-NEXT: lw s5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 4 -; RV32ZVE32F-NEXT: beqz a2, .LBB57_3 -; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load4 +; RV32ZVE32F-NEXT: vmv.x.s a5, v10 +; RV32ZVE32F-NEXT: lw a4, 4(a5) +; RV32ZVE32F-NEXT: lw a5, 0(a5) +; RV32ZVE32F-NEXT: andi a6, t0, 4 +; RV32ZVE32F-NEXT: beqz a6, .LBB57_3 +; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load4 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: lw t6, 4(a2) -; RV32ZVE32F-NEXT: lw s2, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 8 -; RV32ZVE32F-NEXT: beqz a2, .LBB57_4 -; RV32ZVE32F-NEXT: .LBB57_13: # %cond.load7 +; RV32ZVE32F-NEXT: vmv.x.s a7, v10 +; RV32ZVE32F-NEXT: lw a6, 4(a7) +; RV32ZVE32F-NEXT: lw a7, 0(a7) +; RV32ZVE32F-NEXT: andi t1, t0, 8 +; RV32ZVE32F-NEXT: beqz t1, .LBB57_4 +; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: lw t4, 4(a2) -; RV32ZVE32F-NEXT: lw t5, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 16 -; RV32ZVE32F-NEXT: beqz a2, .LBB57_5 -; RV32ZVE32F-NEXT: .LBB57_14: # %cond.load10 +; RV32ZVE32F-NEXT: vmv.x.s t2, v10 +; RV32ZVE32F-NEXT: lw t1, 4(t2) +; RV32ZVE32F-NEXT: lw t2, 0(t2) +; RV32ZVE32F-NEXT: andi t3, t0, 16 +; RV32ZVE32F-NEXT: beqz t3, .LBB57_5 +; RV32ZVE32F-NEXT: .LBB57_13: # %cond.load10 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: lw t2, 4(a2) -; RV32ZVE32F-NEXT: lw t3, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 32 -; RV32ZVE32F-NEXT: beqz a2, .LBB57_6 -; RV32ZVE32F-NEXT: .LBB57_15: # %cond.load13 +; RV32ZVE32F-NEXT: vmv.x.s t4, v10 +; RV32ZVE32F-NEXT: lw t3, 4(t4) +; RV32ZVE32F-NEXT: lw t4, 0(t4) +; RV32ZVE32F-NEXT: andi t5, t0, 32 +; RV32ZVE32F-NEXT: beqz t5, .LBB57_6 +; RV32ZVE32F-NEXT: .LBB57_14: # %cond.load13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: lw t0, 4(a2) -; RV32ZVE32F-NEXT: lw t1, 0(a2) -; RV32ZVE32F-NEXT: andi a2, a1, 64 -; RV32ZVE32F-NEXT: beqz a2, .LBB57_7 -; RV32ZVE32F-NEXT: .LBB57_16: # %cond.load16 +; RV32ZVE32F-NEXT: vmv.x.s t6, v10 +; RV32ZVE32F-NEXT: lw t5, 4(t6) +; RV32ZVE32F-NEXT: lw t6, 0(t6) +; RV32ZVE32F-NEXT: andi s0, t0, 64 +; RV32ZVE32F-NEXT: beqz s0, .LBB57_7 +; RV32ZVE32F-NEXT: .LBB57_15: # %cond.load16 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a2, v10 -; RV32ZVE32F-NEXT: lw a6, 4(a2) -; RV32ZVE32F-NEXT: lw a7, 0(a2) -; RV32ZVE32F-NEXT: andi a1, a1, -128 -; RV32ZVE32F-NEXT: bnez a1, .LBB57_8 -; RV32ZVE32F-NEXT: j .LBB57_9 +; RV32ZVE32F-NEXT: vmv.x.s s1, v10 +; RV32ZVE32F-NEXT: lw s0, 4(s1) +; RV32ZVE32F-NEXT: lw s1, 0(s1) +; RV32ZVE32F-NEXT: andi t0, t0, -128 +; RV32ZVE32F-NEXT: beqz t0, .LBB57_8 +; RV32ZVE32F-NEXT: .LBB57_16: # %cond.load19 +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: lw t0, 4(a3) +; RV32ZVE32F-NEXT: lw a3, 0(a3) +; RV32ZVE32F-NEXT: .LBB57_17: # %else20 +; RV32ZVE32F-NEXT: sw a2, 0(a0) +; RV32ZVE32F-NEXT: sw a1, 4(a0) +; RV32ZVE32F-NEXT: sw a5, 8(a0) +; RV32ZVE32F-NEXT: sw a4, 12(a0) +; RV32ZVE32F-NEXT: sw a7, 16(a0) +; RV32ZVE32F-NEXT: sw a6, 20(a0) +; RV32ZVE32F-NEXT: sw t2, 24(a0) +; RV32ZVE32F-NEXT: sw t1, 28(a0) +; RV32ZVE32F-NEXT: sw t4, 32(a0) +; RV32ZVE32F-NEXT: sw t3, 36(a0) +; RV32ZVE32F-NEXT: sw t6, 40(a0) +; RV32ZVE32F-NEXT: sw t5, 44(a0) +; RV32ZVE32F-NEXT: sw s1, 48(a0) +; RV32ZVE32F-NEXT: sw s0, 52(a0) +; RV32ZVE32F-NEXT: sw a3, 56(a0) +; RV32ZVE32F-NEXT: sw t0, 60(a0) +; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: addi sp, sp, 16 +; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_baseidx_v8i64: ; RV64ZVE32F: # %bb.0: @@ -11974,34 +11939,23 @@ ; ; RV32ZVE32F-LABEL: mgather_baseidx_v8f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -64 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 64 -; RV32ZVE32F-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset ra, -4 -; RV32ZVE32F-NEXT: .cfi_offset s0, -8 -; RV32ZVE32F-NEXT: addi s0, sp, 64 -; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 -; RV32ZVE32F-NEXT: andi sp, sp, -32 -; RV32ZVE32F-NEXT: lw a3, 0(a2) -; RV32ZVE32F-NEXT: lw a4, 8(a2) -; RV32ZVE32F-NEXT: lw a5, 16(a2) -; RV32ZVE32F-NEXT: lw a6, 24(a2) -; RV32ZVE32F-NEXT: lw a7, 56(a2) -; RV32ZVE32F-NEXT: lw t0, 48(a2) -; RV32ZVE32F-NEXT: lw t1, 40(a2) -; RV32ZVE32F-NEXT: lw a2, 32(a2) -; RV32ZVE32F-NEXT: sw a7, 28(sp) -; RV32ZVE32F-NEXT: sw t0, 24(sp) -; RV32ZVE32F-NEXT: sw t1, 20(sp) -; RV32ZVE32F-NEXT: sw a2, 16(sp) -; RV32ZVE32F-NEXT: sw a6, 12(sp) -; RV32ZVE32F-NEXT: sw a5, 8(sp) -; RV32ZVE32F-NEXT: sw a4, 4(sp) -; RV32ZVE32F-NEXT: sw a3, 0(sp) -; RV32ZVE32F-NEXT: mv a2, sp +; RV32ZVE32F-NEXT: lw a3, 56(a2) +; RV32ZVE32F-NEXT: lw a4, 48(a2) +; RV32ZVE32F-NEXT: lw a5, 40(a2) +; RV32ZVE32F-NEXT: lw a6, 32(a2) +; RV32ZVE32F-NEXT: lw a7, 24(a2) +; RV32ZVE32F-NEXT: lw t0, 0(a2) +; RV32ZVE32F-NEXT: lw t1, 8(a2) +; RV32ZVE32F-NEXT: lw a2, 16(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vle32.v v8, (a2) +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma @@ -12043,10 +11997,6 @@ ; RV32ZVE32F-NEXT: fsd fa5, 40(a0) ; RV32ZVE32F-NEXT: fsd fa6, 48(a0) ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) -; RV32ZVE32F-NEXT: addi sp, s0, -64 -; RV32ZVE32F-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: addi sp, sp, 64 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -5684,22 +5684,19 @@ ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -96 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 96 -; RV32ZVE32F-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s2, 84(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s3, 80(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s4, 76(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s5, 72(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s6, 68(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s7, 64(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s8, 60(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s9, 56(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s10, 52(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s11, 48(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset ra, -4 -; RV32ZVE32F-NEXT: .cfi_offset s0, -8 +; RV32ZVE32F-NEXT: addi sp, sp, -48 +; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 48 +; RV32ZVE32F-NEXT: sw s0, 44(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s1, 40(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s2, 36(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s3, 32(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s4, 28(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s5, 24(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s6, 20(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s7, 16(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: sw s8, 12(sp) # 4-byte Folded Spill +; RV32ZVE32F-NEXT: .cfi_offset s0, -4 +; RV32ZVE32F-NEXT: .cfi_offset s1, -8 ; RV32ZVE32F-NEXT: .cfi_offset s2, -12 ; RV32ZVE32F-NEXT: .cfi_offset s3, -16 ; RV32ZVE32F-NEXT: .cfi_offset s4, -20 @@ -5707,12 +5704,6 @@ ; RV32ZVE32F-NEXT: .cfi_offset s6, -28 ; RV32ZVE32F-NEXT: .cfi_offset s7, -32 ; RV32ZVE32F-NEXT: .cfi_offset s8, -36 -; RV32ZVE32F-NEXT: .cfi_offset s9, -40 -; RV32ZVE32F-NEXT: .cfi_offset s10, -44 -; RV32ZVE32F-NEXT: .cfi_offset s11, -48 -; RV32ZVE32F-NEXT: addi s0, sp, 96 -; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 -; RV32ZVE32F-NEXT: andi sp, sp, -32 ; RV32ZVE32F-NEXT: lw a3, 60(a0) ; RV32ZVE32F-NEXT: lw a4, 56(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) @@ -5725,55 +5716,51 @@ ; RV32ZVE32F-NEXT: lw t4, 24(a0) ; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: lw t6, 16(a0) -; RV32ZVE32F-NEXT: lw s3, 12(a0) -; RV32ZVE32F-NEXT: lw s2, 8(a0) -; RV32ZVE32F-NEXT: lw s5, 4(a0) -; RV32ZVE32F-NEXT: lw s4, 0(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a2) -; RV32ZVE32F-NEXT: lw s6, 8(a2) -; RV32ZVE32F-NEXT: lw s7, 16(a2) -; RV32ZVE32F-NEXT: lw s8, 24(a2) -; RV32ZVE32F-NEXT: lw s9, 56(a2) -; RV32ZVE32F-NEXT: lw s10, 48(a2) -; RV32ZVE32F-NEXT: lw s11, 40(a2) -; RV32ZVE32F-NEXT: lw a2, 32(a2) -; RV32ZVE32F-NEXT: sw s9, 28(sp) -; RV32ZVE32F-NEXT: sw s10, 24(sp) -; RV32ZVE32F-NEXT: sw s11, 20(sp) -; RV32ZVE32F-NEXT: sw a2, 16(sp) -; RV32ZVE32F-NEXT: sw s8, 12(sp) -; RV32ZVE32F-NEXT: sw s7, 8(sp) -; RV32ZVE32F-NEXT: sw s6, 4(sp) -; RV32ZVE32F-NEXT: sw a0, 0(sp) -; RV32ZVE32F-NEXT: mv a0, sp +; RV32ZVE32F-NEXT: lw s1, 12(a0) +; RV32ZVE32F-NEXT: lw s0, 8(a0) +; RV32ZVE32F-NEXT: lw s2, 56(a2) +; RV32ZVE32F-NEXT: lw s3, 48(a2) +; RV32ZVE32F-NEXT: lw s4, 40(a2) +; RV32ZVE32F-NEXT: lw s5, 32(a2) +; RV32ZVE32F-NEXT: lw s6, 24(a2) +; RV32ZVE32F-NEXT: lw s7, 0(a2) +; RV32ZVE32F-NEXT: lw s8, 8(a2) +; RV32ZVE32F-NEXT: lw a2, 16(a2) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vle32.v v8, (a0) +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s8 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s3 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s2 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB51_10 +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: bnez a2, .LBB51_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB51_11 +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: bnez a0, .LBB51_11 ; RV32ZVE32F-NEXT: .LBB51_2: # %else2 -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB51_12 +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: bnez a0, .LBB51_12 ; RV32ZVE32F-NEXT: .LBB51_3: # %else4 -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB51_13 +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: bnez a0, .LBB51_13 ; RV32ZVE32F-NEXT: .LBB51_4: # %else6 -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB51_14 +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: bnez a0, .LBB51_14 ; RV32ZVE32F-NEXT: .LBB51_5: # %else8 -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB51_15 +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: bnez a0, .LBB51_15 ; RV32ZVE32F-NEXT: .LBB51_6: # %else10 -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB51_16 +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB51_16 ; RV32ZVE32F-NEXT: .LBB51_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB51_9 ; RV32ZVE32F-NEXT: .LBB51_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5782,75 +5769,73 @@ ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a3, 4(a0) ; RV32ZVE32F-NEXT: .LBB51_9: # %else14 -; RV32ZVE32F-NEXT: addi sp, s0, -96 -; RV32ZVE32F-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s2, 84(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s3, 80(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s4, 76(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s5, 72(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s6, 68(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s7, 64(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s8, 60(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s9, 56(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s10, 52(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s11, 48(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: addi sp, sp, 96 +; RV32ZVE32F-NEXT: lw s0, 44(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s1, 40(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s2, 36(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s3, 32(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s4, 28(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s5, 24(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s6, 20(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s7, 16(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: lw s8, 12(sp) # 4-byte Folded Reload +; RV32ZVE32F-NEXT: addi sp, sp, 48 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store +; RV32ZVE32F-NEXT: lw a2, 4(a0) +; RV32ZVE32F-NEXT: lw a0, 0(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 -; RV32ZVE32F-NEXT: sw s5, 4(a1) -; RV32ZVE32F-NEXT: sw s4, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_2 +; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: sw a2, 4(s2) +; RV32ZVE32F-NEXT: sw a0, 0(s2) +; RV32ZVE32F-NEXT: andi a0, a1, 2 +; RV32ZVE32F-NEXT: beqz a0, .LBB51_2 ; RV32ZVE32F-NEXT: .LBB51_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: sw s3, 4(a1) -; RV32ZVE32F-NEXT: sw s2, 0(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_3 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: sw s1, 4(a0) +; RV32ZVE32F-NEXT: sw s0, 0(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 +; RV32ZVE32F-NEXT: beqz a0, .LBB51_3 ; RV32ZVE32F-NEXT: .LBB51_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: sw t6, 0(a1) -; RV32ZVE32F-NEXT: sw t5, 4(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_4 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 +; RV32ZVE32F-NEXT: beqz a0, .LBB51_4 ; RV32ZVE32F-NEXT: .LBB51_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: sw t4, 0(a1) -; RV32ZVE32F-NEXT: sw t3, 4(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_5 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 +; RV32ZVE32F-NEXT: beqz a0, .LBB51_5 ; RV32ZVE32F-NEXT: .LBB51_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: sw t2, 0(a1) -; RV32ZVE32F-NEXT: sw t1, 4(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_6 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 +; RV32ZVE32F-NEXT: beqz a0, .LBB51_6 ; RV32ZVE32F-NEXT: .LBB51_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: sw t0, 0(a1) -; RV32ZVE32F-NEXT: sw a7, 4(a1) -; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: beqz a1, .LBB51_7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw a7, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 64 +; RV32ZVE32F-NEXT: beqz a0, .LBB51_7 ; RV32ZVE32F-NEXT: .LBB51_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 -; RV32ZVE32F-NEXT: vmv.x.s a1, v10 -; RV32ZVE32F-NEXT: sw a6, 0(a1) -; RV32ZVE32F-NEXT: sw a5, 4(a1) -; RV32ZVE32F-NEXT: andi a0, a0, -128 +; RV32ZVE32F-NEXT: vmv.x.s a0, v10 +; RV32ZVE32F-NEXT: sw a6, 0(a0) +; RV32ZVE32F-NEXT: sw a5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_8 ; RV32ZVE32F-NEXT: j .LBB51_9 ; @@ -10381,121 +10366,106 @@ ; ; RV32ZVE32F-LABEL: mscatter_baseidx_v8f64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi sp, sp, -64 -; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 64 -; RV32ZVE32F-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32ZVE32F-NEXT: .cfi_offset ra, -4 -; RV32ZVE32F-NEXT: .cfi_offset s0, -8 -; RV32ZVE32F-NEXT: addi s0, sp, 64 -; RV32ZVE32F-NEXT: .cfi_def_cfa s0, 0 -; RV32ZVE32F-NEXT: andi sp, sp, -32 -; RV32ZVE32F-NEXT: lw a2, 0(a1) -; RV32ZVE32F-NEXT: lw a3, 8(a1) -; RV32ZVE32F-NEXT: lw a4, 16(a1) -; RV32ZVE32F-NEXT: lw a5, 24(a1) -; RV32ZVE32F-NEXT: lw a6, 56(a1) -; RV32ZVE32F-NEXT: lw a7, 48(a1) -; RV32ZVE32F-NEXT: lw t0, 40(a1) -; RV32ZVE32F-NEXT: lw a1, 32(a1) -; RV32ZVE32F-NEXT: sw a6, 28(sp) -; RV32ZVE32F-NEXT: sw a7, 24(sp) -; RV32ZVE32F-NEXT: sw t0, 20(sp) -; RV32ZVE32F-NEXT: sw a1, 16(sp) -; RV32ZVE32F-NEXT: sw a5, 12(sp) -; RV32ZVE32F-NEXT: sw a4, 8(sp) -; RV32ZVE32F-NEXT: sw a3, 4(sp) -; RV32ZVE32F-NEXT: sw a2, 0(sp) -; RV32ZVE32F-NEXT: mv a1, sp +; RV32ZVE32F-NEXT: lw a2, 56(a1) +; RV32ZVE32F-NEXT: lw a3, 48(a1) +; RV32ZVE32F-NEXT: lw a4, 40(a1) +; RV32ZVE32F-NEXT: lw a5, 32(a1) +; RV32ZVE32F-NEXT: lw a6, 24(a1) +; RV32ZVE32F-NEXT: lw a7, 0(a1) +; RV32ZVE32F-NEXT: lw t0, 8(a1) +; RV32ZVE32F-NEXT: lw a1, 16(a1) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vle32.v v8, (a1) +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 ; RV32ZVE32F-NEXT: andi a1, a0, 1 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_10 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a0, 2 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_10 ; RV32ZVE32F-NEXT: .LBB90_2: # %else2 ; RV32ZVE32F-NEXT: andi a1, a0, 4 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_11 ; RV32ZVE32F-NEXT: .LBB90_3: # %else4 ; RV32ZVE32F-NEXT: andi a1, a0, 8 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_12 ; RV32ZVE32F-NEXT: .LBB90_4: # %else6 ; RV32ZVE32F-NEXT: andi a1, a0, 16 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_13 ; RV32ZVE32F-NEXT: .LBB90_5: # %else8 ; RV32ZVE32F-NEXT: andi a1, a0, 32 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_14 ; RV32ZVE32F-NEXT: .LBB90_6: # %else10 ; RV32ZVE32F-NEXT: andi a1, a0, 64 -; RV32ZVE32F-NEXT: bnez a1, .LBB90_16 +; RV32ZVE32F-NEXT: bnez a1, .LBB90_15 ; RV32ZVE32F-NEXT: .LBB90_7: # %else12 ; RV32ZVE32F-NEXT: andi a0, a0, -128 -; RV32ZVE32F-NEXT: beqz a0, .LBB90_9 -; RV32ZVE32F-NEXT: .LBB90_8: # %cond.store13 -; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 -; RV32ZVE32F-NEXT: vmv.x.s a0, v8 -; RV32ZVE32F-NEXT: fsd fa7, 0(a0) -; RV32ZVE32F-NEXT: .LBB90_9: # %else14 -; RV32ZVE32F-NEXT: addi sp, s0, -64 -; RV32ZVE32F-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32ZVE32F-NEXT: addi sp, sp, 64 +; RV32ZVE32F-NEXT: bnez a0, .LBB90_16 +; RV32ZVE32F-NEXT: .LBB90_8: # %else14 ; RV32ZVE32F-NEXT: ret -; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store +; RV32ZVE32F-NEXT: .LBB90_9: # %cond.store ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v8 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 -; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store1 +; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa1, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 4 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_3 -; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store3 +; RV32ZVE32F-NEXT: .LBB90_11: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa2, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 8 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_4 -; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store5 +; RV32ZVE32F-NEXT: .LBB90_12: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa3, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 16 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_5 -; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store7 +; RV32ZVE32F-NEXT: .LBB90_13: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 32 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_6 -; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store9 +; RV32ZVE32F-NEXT: .LBB90_14: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa5, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 64 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_7 -; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store11 +; RV32ZVE32F-NEXT: .LBB90_15: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6 ; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa6, 0(a1) ; RV32ZVE32F-NEXT: andi a0, a0, -128 -; RV32ZVE32F-NEXT: bnez a0, .LBB90_8 -; RV32ZVE32F-NEXT: j .LBB90_9 +; RV32ZVE32F-NEXT: beqz a0, .LBB90_8 +; RV32ZVE32F-NEXT: .LBB90_16: # %cond.store13 +; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7 +; RV32ZVE32F-NEXT: vmv.x.s a0, v8 +; RV32ZVE32F-NEXT: fsd fa7, 0(a0) +; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mscatter_baseidx_v8f64: ; RV64ZVE32F: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -103,26 +103,23 @@ define void @store_v6f16(ptr %p, <6 x half> %v) { ; RV32-LABEL: store_v6f16: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lh a2, 4(a1) -; RV32-NEXT: lhu a3, 0(a1) +; RV32-NEXT: lh a2, 20(a1) +; RV32-NEXT: lhu a3, 16(a1) ; RV32-NEXT: slli a2, a2, 16 ; RV32-NEXT: or a2, a3, a2 ; RV32-NEXT: lh a3, 12(a1) ; RV32-NEXT: lhu a4, 8(a1) -; RV32-NEXT: lh a5, 20(a1) -; RV32-NEXT: lhu a1, 16(a1) +; RV32-NEXT: lh a5, 4(a1) +; RV32-NEXT: lhu a1, 0(a1) ; RV32-NEXT: slli a3, a3, 16 ; RV32-NEXT: or a3, a4, a3 ; RV32-NEXT: slli a5, a5, 16 ; RV32-NEXT: or a1, a1, a5 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw a3, 4(sp) -; RV32-NEXT: sw a2, 0(sp) -; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: vslidedown.vi v9, v8, 2 @@ -131,7 +128,6 @@ ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: store_v6f16: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -5,29 +5,27 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: srli a1, a2, 5 -; RV32-NEXT: sb a1, 13(sp) ; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: sb a1, 8(sp) -; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: slli a1, a2, 30 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 29 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: slli a1, a2, 28 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 11(sp) -; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 27 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 10(sp) -; RV32-NEXT: slli a2, a2, 30 -; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 9(sp) -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vle8.v v10, (a1) +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: srli a2, a2, 5 +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -39,34 +37,31 @@ ; RV32-NEXT: vse32.v v10, (a0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vv_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: srli a1, a2, 5 -; RV64-NEXT: sb a1, 13(sp) ; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: sb a1, 8(sp) -; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: vslide1down.vx v10, v8, a1 +; RV64-NEXT: slli a1, a2, 62 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 61 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 12(sp) +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: slli a1, a2, 60 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 11(sp) -; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 59 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 10(sp) -; RV64-NEXT: slli a2, a2, 62 -; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 9(sp) -; RV64-NEXT: addi a1, sp, 8 -; RV64-NEXT: vle8.v v10, (a1) +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: srli a2, a2, 5 +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -77,7 +72,6 @@ ; RV64-NEXT: vse64.v v10, (a0) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %va = load <6 x i32>, ptr %a %vb = load <6 x i32>, ptr %b @@ -90,29 +84,27 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: srli a1, a2, 5 -; RV32-NEXT: sb a1, 13(sp) ; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: sb a1, 8(sp) -; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: slli a1, a2, 30 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 29 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: slli a1, a2, 28 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 11(sp) -; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 27 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 10(sp) -; RV32-NEXT: slli a2, a2, 30 -; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 9(sp) -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vle8.v v10, (a1) +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: srli a2, a2, 5 +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -124,34 +116,31 @@ ; RV32-NEXT: vse32.v v10, (a0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vx_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: srli a1, a2, 5 -; RV64-NEXT: sb a1, 13(sp) ; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: sb a1, 8(sp) -; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: vslide1down.vx v10, v8, a1 +; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 12(sp) +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: slli a1, a2, 60 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 11(sp) -; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 59 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 10(sp) -; RV64-NEXT: slli a2, a2, 62 -; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 9(sp) -; RV64-NEXT: addi a1, sp, 8 -; RV64-NEXT: vle8.v v10, (a1) +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: srli a2, a2, 5 +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -162,7 +151,6 @@ ; RV64-NEXT: vse64.v v10, (a0) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %vb = load <6 x i32>, ptr %b %ahead = insertelement <6 x i32> poison, i32 %a, i32 0 @@ -176,29 +164,27 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vi_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: srli a0, a1, 5 -; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: andi a0, a1, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: slli a0, a1, 27 +; RV32-NEXT: vslide1down.vx v10, v8, a0 +; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: slli a0, a1, 29 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: slli a0, a1, 28 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: slli a0, a1, 29 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: slli a0, a1, 27 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: slli a1, a1, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 9(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vle8.v v10, (a0) +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: srli a1, a1, 5 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -210,34 +196,31 @@ ; RV32-NEXT: vse32.v v10, (a0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vi_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: srli a0, a1, 5 -; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: slli a0, a1, 59 +; RV64-NEXT: vslide1down.vx v10, v8, a0 +; RV64-NEXT: slli a0, a1, 62 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: slli a0, a1, 61 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: slli a0, a1, 60 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: slli a0, a1, 61 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: slli a0, a1, 59 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: slli a1, a1, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 9(sp) -; RV64-NEXT: addi a0, sp, 8 -; RV64-NEXT: vle8.v v10, (a0) +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: srli a1, a1, 5 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -248,7 +231,6 @@ ; RV64-NEXT: vse64.v v10, (a0) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %vb = load <6 x i32>, ptr %b %a = insertelement <6 x i32> poison, i32 -1, i32 0 @@ -263,29 +245,27 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: srli a1, a2, 5 -; RV32-NEXT: sb a1, 13(sp) ; RV32-NEXT: andi a1, a2, 1 -; RV32-NEXT: sb a1, 8(sp) -; RV32-NEXT: slli a1, a2, 27 +; RV32-NEXT: vslide1down.vx v10, v8, a1 +; RV32-NEXT: slli a1, a2, 30 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 12(sp) +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: slli a1, a2, 28 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 11(sp) -; RV32-NEXT: slli a1, a2, 29 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a2, 27 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 10(sp) -; RV32-NEXT: slli a2, a2, 30 -; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 9(sp) -; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vle8.v v10, (a1) +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: srli a2, a2, 5 +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -297,34 +277,31 @@ ; RV32-NEXT: vse32.v v10, (a0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a3) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vv_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: srli a1, a2, 5 -; RV64-NEXT: sb a1, 13(sp) ; RV64-NEXT: andi a1, a2, 1 -; RV64-NEXT: sb a1, 8(sp) -; RV64-NEXT: slli a1, a2, 59 +; RV64-NEXT: vslide1down.vx v10, v8, a1 +; RV64-NEXT: slli a1, a2, 62 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 12(sp) +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: slli a1, a2, 60 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 11(sp) -; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a2, 59 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 10(sp) -; RV64-NEXT: slli a2, a2, 62 -; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 9(sp) -; RV64-NEXT: addi a1, sp, 8 -; RV64-NEXT: vle8.v v10, (a1) +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: srli a2, a2, 5 +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -335,7 +312,6 @@ ; RV64-NEXT: vse64.v v10, (a0) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a3) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %va = load <6 x float>, ptr %a %vb = load <6 x float>, ptr %b @@ -348,29 +324,27 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: srli a0, a1, 5 -; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: andi a0, a1, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: slli a0, a1, 27 +; RV32-NEXT: vslide1down.vx v10, v8, a0 +; RV32-NEXT: slli a0, a1, 30 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: slli a0, a1, 29 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: slli a0, a1, 28 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: slli a0, a1, 29 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: slli a0, a1, 27 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: slli a1, a1, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 9(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vle8.v v10, (a0) +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: srli a1, a1, 5 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -382,34 +356,31 @@ ; RV32-NEXT: vse32.v v10, (a0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vx_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: srli a0, a1, 5 -; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: slli a0, a1, 59 +; RV64-NEXT: vslide1down.vx v10, v8, a0 +; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: slli a0, a1, 61 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: slli a0, a1, 60 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: slli a0, a1, 61 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: slli a0, a1, 59 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: slli a1, a1, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 9(sp) -; RV64-NEXT: addi a0, sp, 8 -; RV64-NEXT: vle8.v v10, (a0) +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: srli a1, a1, 5 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -420,7 +391,6 @@ ; RV64-NEXT: vse64.v v10, (a0) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %vb = load <6 x float>, ptr %b %ahead = insertelement <6 x float> poison, float %a, i32 0 @@ -434,29 +404,27 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vfpzero_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: srli a0, a1, 5 -; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: andi a0, a1, 1 -; RV32-NEXT: sb a0, 8(sp) -; RV32-NEXT: slli a0, a1, 27 +; RV32-NEXT: vslide1down.vx v10, v8, a0 +; RV32-NEXT: slli a0, a1, 30 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: slli a0, a1, 29 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: slli a0, a1, 28 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: slli a0, a1, 29 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: slli a0, a1, 27 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: slli a1, a1, 30 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 9(sp) -; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vle8.v v10, (a0) +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: srli a1, a1, 5 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a0 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -468,34 +436,31 @@ ; RV32-NEXT: vse32.v v10, (a0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vse32.v v8, (a2) -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vselect_vfpzero_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: srli a0, a1, 5 -; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: andi a0, a1, 1 -; RV64-NEXT: sb a0, 8(sp) -; RV64-NEXT: slli a0, a1, 59 +; RV64-NEXT: vslide1down.vx v10, v8, a0 +; RV64-NEXT: slli a0, a1, 62 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: slli a0, a1, 61 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: slli a0, a1, 60 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: slli a0, a1, 61 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: slli a0, a1, 59 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: slli a1, a1, 62 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 9(sp) -; RV64-NEXT: addi a0, sp, 8 -; RV64-NEXT: vle8.v v10, (a0) +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: srli a1, a1, 5 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vslide1down.vx v10, v10, a0 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma @@ -506,7 +471,6 @@ ; RV64-NEXT: vse64.v v10, (a0) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vse32.v v8, (a2) -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %vb = load <6 x float>, ptr %b %a = insertelement <6 x float> poison, float 0.0, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -7,242 +7,230 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; RV32-LABEL: vector_deinterleave_v16i1_v32i1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vfirst.m a0, v0 ; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: sb a0, 16(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV32-NEXT: vmv.x.s a0, v0 -; RV32-NEXT: slli a1, a0, 17 +; RV32-NEXT: slli a1, a0, 29 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 23(sp) -; RV32-NEXT: slli a1, a0, 19 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: slli a1, a0, 27 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 22(sp) -; RV32-NEXT: slli a1, a0, 21 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: slli a1, a0, 25 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 21(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: slli a1, a0, 23 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 20(sp) -; RV32-NEXT: slli a1, a0, 25 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: slli a1, a0, 21 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 19(sp) -; RV32-NEXT: slli a1, a0, 27 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: slli a1, a0, 19 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 18(sp) -; RV32-NEXT: slli a1, a0, 29 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: slli a1, a0, 17 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 17(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v8, v0, 2 +; RV32-NEXT: vslidedown.vi v9, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vfirst.m a1, v8 +; RV32-NEXT: vfirst.m a1, v9 ; RV32-NEXT: seqz a1, a1 -; RV32-NEXT: sb a1, 24(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: slli a2, a1, 17 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: slli a2, a1, 29 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 31(sp) -; RV32-NEXT: slli a2, a1, 19 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 27 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 30(sp) -; RV32-NEXT: slli a2, a1, 21 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 25 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 29(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: slli a2, a1, 23 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 28(sp) -; RV32-NEXT: slli a2, a1, 25 -; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 27(sp) -; RV32-NEXT: slli a2, a1, 27 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 21 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 26(sp) -; RV32-NEXT: slli a2, a1, 29 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 19 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 25(sp) -; RV32-NEXT: slli a2, a0, 16 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 17 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 7(sp) -; RV32-NEXT: slli a2, a0, 18 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vand.vi v8, v8, 1 +; RV32-NEXT: vmsne.vi v0, v8, 0 +; RV32-NEXT: slli a2, a0, 30 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 6(sp) -; RV32-NEXT: slli a2, a0, 20 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 28 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 5(sp) -; RV32-NEXT: slli a2, a0, 22 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 26 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 4(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: slli a2, a0, 24 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 3(sp) -; RV32-NEXT: slli a2, a0, 26 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 22 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 2(sp) -; RV32-NEXT: slli a2, a0, 28 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 20 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 1(sp) -; RV32-NEXT: slli a0, a0, 30 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 18 +; RV32-NEXT: srli a2, a2, 31 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a0, a0, 16 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 0(sp) -; RV32-NEXT: slli a0, a1, 16 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a0, a1, 30 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 15(sp) -; RV32-NEXT: slli a0, a1, 18 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 14(sp) -; RV32-NEXT: slli a0, a1, 20 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a0, a1, 28 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 13(sp) -; RV32-NEXT: slli a0, a1, 22 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a0, a1, 26 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 12(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: slli a0, a1, 24 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 11(sp) -; RV32-NEXT: slli a0, a1, 26 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a0, a1, 22 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 10(sp) -; RV32-NEXT: slli a0, a1, 28 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a0, a1, 20 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a0, a1, 18 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 9(sp) -; RV32-NEXT: slli a1, a1, 30 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a1, a1, 16 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 8(sp) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vle8.v v9, (a0) +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: vand.vi v8, v8, 1 -; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: vand.vi v8, v9, 1 ; RV32-NEXT: vmsne.vi v8, v8, 0 -; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_deinterleave_v16i1_v32i1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -32 -; RV64-NEXT: .cfi_def_cfa_offset 32 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vfirst.m a0, v0 ; RV64-NEXT: seqz a0, a0 -; RV64-NEXT: sb a0, 16(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma ; RV64-NEXT: vmv.x.s a0, v0 -; RV64-NEXT: slli a1, a0, 49 +; RV64-NEXT: slli a1, a0, 61 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 23(sp) -; RV64-NEXT: slli a1, a0, 51 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: slli a1, a0, 59 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 22(sp) -; RV64-NEXT: slli a1, a0, 53 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: slli a1, a0, 57 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 21(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: slli a1, a0, 55 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 20(sp) -; RV64-NEXT: slli a1, a0, 57 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: slli a1, a0, 53 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 19(sp) -; RV64-NEXT: slli a1, a0, 59 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: slli a1, a0, 51 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 18(sp) -; RV64-NEXT: slli a1, a0, 61 +; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: slli a1, a0, 49 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 17(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v8, v0, 2 +; RV64-NEXT: vslidedown.vi v9, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vfirst.m a1, v8 +; RV64-NEXT: vfirst.m a1, v9 ; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: sb a1, 24(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: slli a2, a1, 49 +; RV64-NEXT: vmv.x.s a1, v9 +; RV64-NEXT: slli a2, a1, 61 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 31(sp) -; RV64-NEXT: slli a2, a1, 51 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 59 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 30(sp) -; RV64-NEXT: slli a2, a1, 53 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 57 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 29(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a2 ; RV64-NEXT: slli a2, a1, 55 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 28(sp) -; RV64-NEXT: slli a2, a1, 57 -; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 27(sp) -; RV64-NEXT: slli a2, a1, 59 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 53 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 26(sp) -; RV64-NEXT: slli a2, a1, 61 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 51 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 25(sp) -; RV64-NEXT: slli a2, a0, 48 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 49 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 7(sp) -; RV64-NEXT: slli a2, a0, 50 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: vand.vi v8, v8, 1 +; RV64-NEXT: vmsne.vi v0, v8, 0 +; RV64-NEXT: slli a2, a0, 62 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 6(sp) -; RV64-NEXT: slli a2, a0, 52 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 60 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 5(sp) -; RV64-NEXT: slli a2, a0, 54 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 58 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 4(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a2 ; RV64-NEXT: slli a2, a0, 56 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 3(sp) -; RV64-NEXT: slli a2, a0, 58 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 54 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 2(sp) -; RV64-NEXT: slli a2, a0, 60 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 52 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 1(sp) -; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 50 +; RV64-NEXT: srli a2, a2, 63 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 0(sp) -; RV64-NEXT: slli a0, a1, 48 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a0, a1, 62 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 15(sp) -; RV64-NEXT: slli a0, a1, 50 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 14(sp) -; RV64-NEXT: slli a0, a1, 52 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a0, a1, 60 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 13(sp) -; RV64-NEXT: slli a0, a1, 54 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a0, a1, 58 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 12(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: slli a0, a1, 56 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 11(sp) -; RV64-NEXT: slli a0, a1, 58 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a0, a1, 54 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 10(sp) -; RV64-NEXT: slli a0, a1, 60 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a0, a1, 52 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a0, a1, 50 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 9(sp) -; RV64-NEXT: slli a1, a1, 62 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a1, a1, 48 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 8(sp) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: mv a0, sp -; RV64-NEXT: vle8.v v9, (a0) +; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vand.vi v8, v8, 1 -; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: vand.vi v8, v9, 1 ; RV64-NEXT: vmsne.vi v8, v8, 0 -; RV64-NEXT: addi sp, sp, 32 ; RV64-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec) ret {<16 x i1>, <16 x i1>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -7,248 +7,228 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) { ; RV32-LABEL: vector_interleave_v32i1_v16i1: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -64 -; RV32-NEXT: .cfi_def_cfa_offset 64 -; RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: addi s0, sp, 64 -; RV32-NEXT: .cfi_def_cfa s0, 0 -; RV32-NEXT: andi sp, sp, -32 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vfirst.m a0, v8 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: sb a0, 1(sp) ; RV32-NEXT: vfirst.m a0, v0 ; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: sb a0, 0(sp) +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v8, a0 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vfirst.m a0, v8 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: slli a1, a0, 16 +; RV32-NEXT: vmv.x.s a0, v0 +; RV32-NEXT: slli a1, a0, 30 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 31(sp) -; RV32-NEXT: vmv.x.s a1, v0 -; RV32-NEXT: slli a2, a1, 16 -; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 30(sp) -; RV32-NEXT: slli a2, a0, 17 +; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vsetivli zero, 0, e16, mf4, ta, ma +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: slli a3, a1, 30 +; RV32-NEXT: srli a3, a3, 31 +; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV32-NEXT: vslide1down.vx v8, v10, a3 +; RV32-NEXT: slli a2, a0, 29 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 29(sp) -; RV32-NEXT: slli a2, a1, 17 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 29 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 28(sp) -; RV32-NEXT: slli a2, a0, 18 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 28 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 27(sp) -; RV32-NEXT: slli a2, a1, 18 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 28 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 26(sp) -; RV32-NEXT: slli a2, a0, 19 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 27 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 25(sp) -; RV32-NEXT: slli a2, a1, 19 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 27 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 24(sp) -; RV32-NEXT: slli a2, a0, 20 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 26 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 23(sp) -; RV32-NEXT: slli a2, a1, 20 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 26 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 22(sp) -; RV32-NEXT: slli a2, a0, 21 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 25 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 21(sp) -; RV32-NEXT: slli a2, a1, 21 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 25 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 20(sp) -; RV32-NEXT: slli a2, a0, 22 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 24 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 19(sp) -; RV32-NEXT: slli a2, a1, 22 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 24 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 18(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: slli a2, a0, 23 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 17(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a2 ; RV32-NEXT: slli a2, a1, 23 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 16(sp) -; RV32-NEXT: slli a2, a0, 24 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 22 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 15(sp) -; RV32-NEXT: slli a2, a1, 24 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 22 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 14(sp) -; RV32-NEXT: slli a2, a0, 25 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 21 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 13(sp) -; RV32-NEXT: slli a2, a1, 25 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 21 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 12(sp) -; RV32-NEXT: slli a2, a0, 26 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 20 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 11(sp) -; RV32-NEXT: slli a2, a1, 26 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 20 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 10(sp) -; RV32-NEXT: slli a2, a0, 27 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 19 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 9(sp) -; RV32-NEXT: slli a2, a1, 27 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 19 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 8(sp) -; RV32-NEXT: slli a2, a0, 28 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 18 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 7(sp) -; RV32-NEXT: slli a2, a1, 28 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 18 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 6(sp) -; RV32-NEXT: slli a2, a0, 29 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a0, 17 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 5(sp) -; RV32-NEXT: slli a2, a1, 29 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a2, a1, 17 ; RV32-NEXT: srli a2, a2, 31 -; RV32-NEXT: sb a2, 4(sp) -; RV32-NEXT: slli a0, a0, 30 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: slli a0, a0, 16 ; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: sb a0, 3(sp) -; RV32-NEXT: slli a1, a1, 30 +; RV32-NEXT: vslide1down.vx v8, v8, a0 +; RV32-NEXT: slli a1, a1, 16 ; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: sb a1, 2(sp) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: mv a1, sp -; RV32-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV32-NEXT: vle8.v v8, (a1) +; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: vand.vi v8, v8, 1 ; RV32-NEXT: vmsne.vi v0, v8, 0 -; RV32-NEXT: addi sp, s0, -64 -; RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 64 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_interleave_v32i1_v16i1: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: .cfi_def_cfa_offset 64 -; RV64-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: addi s0, sp, 64 -; RV64-NEXT: .cfi_def_cfa s0, 0 -; RV64-NEXT: andi sp, sp, -32 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vfirst.m a0, v8 -; RV64-NEXT: seqz a0, a0 -; RV64-NEXT: sb a0, 1(sp) ; RV64-NEXT: vfirst.m a0, v0 ; RV64-NEXT: seqz a0, a0 -; RV64-NEXT: sb a0, 0(sp) +; RV64-NEXT: li a2, 32 +; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v8, a0 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vfirst.m a0, v8 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: slli a1, a0, 48 +; RV64-NEXT: vmv.x.s a0, v0 +; RV64-NEXT: slli a1, a0, 62 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 31(sp) -; RV64-NEXT: vmv.x.s a1, v0 -; RV64-NEXT: slli a2, a1, 48 -; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 30(sp) -; RV64-NEXT: slli a2, a0, 49 +; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vsetivli zero, 0, e16, mf4, ta, ma +; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: slli a3, a1, 62 +; RV64-NEXT: srli a3, a3, 63 +; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma +; RV64-NEXT: vslide1down.vx v8, v10, a3 +; RV64-NEXT: slli a2, a0, 61 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 29(sp) -; RV64-NEXT: slli a2, a1, 49 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 61 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 28(sp) -; RV64-NEXT: slli a2, a0, 50 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 60 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 27(sp) -; RV64-NEXT: slli a2, a1, 50 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 60 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 26(sp) -; RV64-NEXT: slli a2, a0, 51 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 59 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 25(sp) -; RV64-NEXT: slli a2, a1, 51 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 59 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 24(sp) -; RV64-NEXT: slli a2, a0, 52 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 58 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 23(sp) -; RV64-NEXT: slli a2, a1, 52 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 58 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 22(sp) -; RV64-NEXT: slli a2, a0, 53 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 57 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 21(sp) -; RV64-NEXT: slli a2, a1, 53 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 57 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 20(sp) -; RV64-NEXT: slli a2, a0, 54 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 56 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 19(sp) -; RV64-NEXT: slli a2, a1, 54 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 56 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 18(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a2 ; RV64-NEXT: slli a2, a0, 55 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 17(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a2 ; RV64-NEXT: slli a2, a1, 55 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 16(sp) -; RV64-NEXT: slli a2, a0, 56 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 54 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 15(sp) -; RV64-NEXT: slli a2, a1, 56 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 54 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 14(sp) -; RV64-NEXT: slli a2, a0, 57 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 53 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 13(sp) -; RV64-NEXT: slli a2, a1, 57 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 53 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 12(sp) -; RV64-NEXT: slli a2, a0, 58 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 52 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 11(sp) -; RV64-NEXT: slli a2, a1, 58 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 52 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 10(sp) -; RV64-NEXT: slli a2, a0, 59 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 51 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 9(sp) -; RV64-NEXT: slli a2, a1, 59 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 51 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 8(sp) -; RV64-NEXT: slli a2, a0, 60 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 50 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 7(sp) -; RV64-NEXT: slli a2, a1, 60 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 50 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 6(sp) -; RV64-NEXT: slli a2, a0, 61 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a0, 49 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 5(sp) -; RV64-NEXT: slli a2, a1, 61 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a2, a1, 49 ; RV64-NEXT: srli a2, a2, 63 -; RV64-NEXT: sb a2, 4(sp) -; RV64-NEXT: slli a0, a0, 62 +; RV64-NEXT: vslide1down.vx v8, v8, a2 +; RV64-NEXT: slli a0, a0, 48 ; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: sb a0, 3(sp) -; RV64-NEXT: slli a1, a1, 62 +; RV64-NEXT: vslide1down.vx v8, v8, a0 +; RV64-NEXT: slli a1, a1, 48 ; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: sb a1, 2(sp) -; RV64-NEXT: li a0, 32 -; RV64-NEXT: mv a1, sp -; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; RV64-NEXT: vle8.v v8, (a1) +; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vand.vi v8, v8, 1 ; RV64-NEXT: vmsne.vi v0, v8, 0 -; RV64-NEXT: addi sp, s0, -64 -; RV64-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 64 ; RV64-NEXT: ret %res = call <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b) ret <32 x i1> %res diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -597,60 +597,69 @@ ; ; RV32MV-LABEL: test_srem_vec: ; RV32MV: # %bb.0: -; RV32MV-NEXT: addi sp, sp, -64 -; RV32MV-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s2, 52(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s3, 48(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s4, 44(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s5, 40(sp) # 4-byte Folded Spill -; RV32MV-NEXT: sw s6, 36(sp) # 4-byte Folded Spill -; RV32MV-NEXT: addi s0, sp, 64 -; RV32MV-NEXT: andi sp, sp, -32 -; RV32MV-NEXT: mv s2, a0 -; RV32MV-NEXT: lw a0, 8(a0) -; RV32MV-NEXT: lw a1, 4(s2) -; RV32MV-NEXT: lbu a2, 12(s2) -; RV32MV-NEXT: slli a3, a0, 31 -; RV32MV-NEXT: srli a4, a1, 1 -; RV32MV-NEXT: or s3, a4, a3 -; RV32MV-NEXT: slli a3, a2, 30 -; RV32MV-NEXT: srli a4, a0, 2 -; RV32MV-NEXT: or s4, a4, a3 -; RV32MV-NEXT: srli a0, a0, 1 +; RV32MV-NEXT: addi sp, sp, -48 +; RV32MV-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32MV-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32MV-NEXT: csrr a1, vlenb +; RV32MV-NEXT: slli a1, a1, 1 +; RV32MV-NEXT: sub sp, sp, a1 +; RV32MV-NEXT: mv s0, a0 +; RV32MV-NEXT: lbu a0, 12(a0) +; RV32MV-NEXT: lw a1, 8(s0) +; RV32MV-NEXT: slli a2, a0, 30 +; RV32MV-NEXT: lw a3, 4(s0) +; RV32MV-NEXT: srli s1, a1, 2 +; RV32MV-NEXT: or s1, s1, a2 +; RV32MV-NEXT: slli a2, a1, 31 +; RV32MV-NEXT: srli a4, a3, 1 +; RV32MV-NEXT: or s2, a4, a2 +; RV32MV-NEXT: srli a0, a0, 2 ; RV32MV-NEXT: slli a0, a0, 31 -; RV32MV-NEXT: srai s5, a0, 31 -; RV32MV-NEXT: srli a2, a2, 2 -; RV32MV-NEXT: slli a2, a2, 31 -; RV32MV-NEXT: lw a0, 0(s2) -; RV32MV-NEXT: srai s6, a2, 31 +; RV32MV-NEXT: srai s3, a0, 31 +; RV32MV-NEXT: srli a1, a1, 1 ; RV32MV-NEXT: slli a1, a1, 31 +; RV32MV-NEXT: lw a0, 0(s0) +; RV32MV-NEXT: srai s4, a1, 31 +; RV32MV-NEXT: slli a1, a3, 31 ; RV32MV-NEXT: srai a1, a1, 31 ; RV32MV-NEXT: li a2, 6 ; RV32MV-NEXT: li a3, 0 ; RV32MV-NEXT: call __moddi3@plt -; RV32MV-NEXT: sw a1, 4(sp) -; RV32MV-NEXT: sw a0, 0(sp) -; RV32MV-NEXT: li a2, -5 -; RV32MV-NEXT: li a3, -1 -; RV32MV-NEXT: mv a0, s4 -; RV32MV-NEXT: mv a1, s6 -; RV32MV-NEXT: call __moddi3@plt -; RV32MV-NEXT: sw a1, 20(sp) -; RV32MV-NEXT: sw a0, 16(sp) +; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32MV-NEXT: vslide1down.vx v8, v8, a0 +; RV32MV-NEXT: vslide1down.vx v8, v8, a1 +; RV32MV-NEXT: addi a0, sp, 16 +; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; RV32MV-NEXT: li a2, 7 -; RV32MV-NEXT: mv a0, s3 -; RV32MV-NEXT: mv a1, s5 +; RV32MV-NEXT: mv a0, s2 +; RV32MV-NEXT: mv a1, s4 ; RV32MV-NEXT: li a3, 0 ; RV32MV-NEXT: call __moddi3@plt -; RV32MV-NEXT: sw a1, 12(sp) -; RV32MV-NEXT: sw a0, 8(sp) +; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32MV-NEXT: addi a2, sp, 16 +; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32MV-NEXT: vslide1down.vx v8, v8, a0 +; RV32MV-NEXT: vslide1down.vx v8, v8, a1 +; RV32MV-NEXT: addi a0, sp, 16 +; RV32MV-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; RV32MV-NEXT: li a2, -5 +; RV32MV-NEXT: li a3, -1 +; RV32MV-NEXT: mv a0, s1 +; RV32MV-NEXT: mv a1, s3 +; RV32MV-NEXT: call __moddi3@plt +; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32MV-NEXT: addi a2, sp, 16 +; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32MV-NEXT: vslide1down.vx v8, v8, a0 +; RV32MV-NEXT: vslide1down.vx v8, v8, a1 +; RV32MV-NEXT: vslide1down.vx v8, v8, a0 +; RV32MV-NEXT: vslide1down.vx v8, v8, a0 ; RV32MV-NEXT: li a0, 85 -; RV32MV-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32MV-NEXT: vmv.s.x v0, a0 -; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32MV-NEXT: mv a0, sp -; RV32MV-NEXT: vle32.v v8, (a0) ; RV32MV-NEXT: vmv.v.i v10, 1 ; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0 ; RV32MV-NEXT: vand.vv v8, v8, v10 @@ -668,14 +677,14 @@ ; RV32MV-NEXT: vmv.v.i v8, 0 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32MV-NEXT: vse32.v v8, (s2) +; RV32MV-NEXT: vse32.v v8, (s0) ; RV32MV-NEXT: vslidedown.vi v10, v8, 1 ; RV32MV-NEXT: vmv.x.s a0, v10 ; RV32MV-NEXT: vslidedown.vi v10, v8, 2 ; RV32MV-NEXT: vmv.x.s a1, v10 ; RV32MV-NEXT: slli a2, a1, 1 ; RV32MV-NEXT: sub a2, a2, a0 -; RV32MV-NEXT: sw a2, 4(s2) +; RV32MV-NEXT: sw a2, 4(s0) ; RV32MV-NEXT: vslidedown.vi v10, v8, 4 ; RV32MV-NEXT: vmv.x.s a0, v10 ; RV32MV-NEXT: srli a2, a0, 30 @@ -684,7 +693,7 @@ ; RV32MV-NEXT: slli a3, a3, 2 ; RV32MV-NEXT: or a2, a3, a2 ; RV32MV-NEXT: andi a2, a2, 7 -; RV32MV-NEXT: sb a2, 12(s2) +; RV32MV-NEXT: sb a2, 12(s0) ; RV32MV-NEXT: srli a1, a1, 31 ; RV32MV-NEXT: vslidedown.vi v8, v8, 3 ; RV32MV-NEXT: vmv.x.s a2, v8 @@ -693,70 +702,65 @@ ; RV32MV-NEXT: slli a0, a0, 2 ; RV32MV-NEXT: or a0, a1, a0 ; RV32MV-NEXT: or a0, a0, a2 -; RV32MV-NEXT: sw a0, 8(s2) -; RV32MV-NEXT: addi sp, s0, -64 -; RV32MV-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s2, 52(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s3, 48(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s4, 44(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s5, 40(sp) # 4-byte Folded Reload -; RV32MV-NEXT: lw s6, 36(sp) # 4-byte Folded Reload -; RV32MV-NEXT: addi sp, sp, 64 +; RV32MV-NEXT: sw a0, 8(s0) +; RV32MV-NEXT: csrr a0, vlenb +; RV32MV-NEXT: slli a0, a0, 1 +; RV32MV-NEXT: add sp, sp, a0 +; RV32MV-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32MV-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32MV-NEXT: addi sp, sp, 48 ; RV32MV-NEXT: ret ; ; RV64MV-LABEL: test_srem_vec: ; RV64MV: # %bb.0: -; RV64MV-NEXT: addi sp, sp, -64 -; RV64MV-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64MV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill -; RV64MV-NEXT: addi s0, sp, 64 -; RV64MV-NEXT: andi sp, sp, -32 -; RV64MV-NEXT: lbu a1, 12(a0) +; RV64MV-NEXT: ld a1, 0(a0) ; RV64MV-NEXT: lwu a2, 8(a0) -; RV64MV-NEXT: slli a1, a1, 32 -; RV64MV-NEXT: ld a3, 0(a0) -; RV64MV-NEXT: or a1, a2, a1 -; RV64MV-NEXT: slli a1, a1, 29 -; RV64MV-NEXT: srai a1, a1, 31 -; RV64MV-NEXT: srli a4, a3, 2 -; RV64MV-NEXT: slli a2, a2, 62 -; RV64MV-NEXT: lui a5, %hi(.LCPI3_0) -; RV64MV-NEXT: ld a5, %lo(.LCPI3_0)(a5) -; RV64MV-NEXT: or a2, a2, a4 -; RV64MV-NEXT: slli a3, a3, 31 +; RV64MV-NEXT: srli a3, a1, 2 +; RV64MV-NEXT: lbu a4, 12(a0) +; RV64MV-NEXT: slli a5, a2, 62 +; RV64MV-NEXT: or a3, a5, a3 ; RV64MV-NEXT: srai a3, a3, 31 -; RV64MV-NEXT: mulh a4, a3, a5 +; RV64MV-NEXT: slli a4, a4, 32 +; RV64MV-NEXT: or a2, a2, a4 +; RV64MV-NEXT: slli a2, a2, 29 +; RV64MV-NEXT: lui a4, %hi(.LCPI3_0) +; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4) +; RV64MV-NEXT: srai a2, a2, 31 +; RV64MV-NEXT: slli a1, a1, 31 +; RV64MV-NEXT: srai a1, a1, 31 +; RV64MV-NEXT: mulh a4, a2, a4 ; RV64MV-NEXT: srli a5, a4, 63 +; RV64MV-NEXT: srai a4, a4, 1 ; RV64MV-NEXT: add a4, a4, a5 -; RV64MV-NEXT: li a5, 6 -; RV64MV-NEXT: mul a4, a4, a5 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64MV-NEXT: srai a2, a2, 31 -; RV64MV-NEXT: sub a3, a3, a4 -; RV64MV-NEXT: sd a3, 0(sp) -; RV64MV-NEXT: mulh a3, a2, a5 -; RV64MV-NEXT: srli a4, a3, 63 -; RV64MV-NEXT: srai a3, a3, 1 -; RV64MV-NEXT: add a3, a3, a4 -; RV64MV-NEXT: slli a4, a3, 3 +; RV64MV-NEXT: add a2, a2, a4 +; RV64MV-NEXT: slli a4, a4, 2 +; RV64MV-NEXT: add a2, a2, a4 +; RV64MV-NEXT: mulh a4, a3, a5 +; RV64MV-NEXT: srli a5, a4, 63 +; RV64MV-NEXT: srai a4, a4, 1 +; RV64MV-NEXT: add a4, a4, a5 ; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) -; RV64MV-NEXT: add a2, a2, a3 -; RV64MV-NEXT: sub a2, a2, a4 -; RV64MV-NEXT: sd a2, 8(sp) -; RV64MV-NEXT: mulh a2, a1, a5 -; RV64MV-NEXT: srli a3, a2, 63 -; RV64MV-NEXT: srai a2, a2, 1 -; RV64MV-NEXT: add a2, a2, a3 -; RV64MV-NEXT: slli a3, a2, 2 -; RV64MV-NEXT: add a1, a1, a2 -; RV64MV-NEXT: add a1, a1, a3 -; RV64MV-NEXT: sd a1, 16(sp) -; RV64MV-NEXT: mv a1, sp +; RV64MV-NEXT: add a3, a3, a4 +; RV64MV-NEXT: slli a4, a4, 3 +; RV64MV-NEXT: sub a3, a3, a4 +; RV64MV-NEXT: mulh a4, a1, a5 +; RV64MV-NEXT: srli a5, a4, 63 +; RV64MV-NEXT: add a4, a4, a5 +; RV64MV-NEXT: li a5, 6 +; RV64MV-NEXT: mul a4, a4, a5 +; RV64MV-NEXT: sub a1, a1, a4 ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64MV-NEXT: vle64.v v8, (a1) +; RV64MV-NEXT: vslide1down.vx v8, v8, a1 +; RV64MV-NEXT: vslide1down.vx v8, v8, a3 +; RV64MV-NEXT: vslide1down.vx v8, v8, a2 +; RV64MV-NEXT: vslide1down.vx v8, v8, a0 ; RV64MV-NEXT: lui a1, %hi(.LCPI3_3) ; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3) ; RV64MV-NEXT: vle64.v v10, (a1) @@ -784,10 +788,6 @@ ; RV64MV-NEXT: srli a3, a3, 62 ; RV64MV-NEXT: or a2, a3, a2 ; RV64MV-NEXT: sw a2, 8(a0) -; RV64MV-NEXT: addi sp, s0, -64 -; RV64MV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload -; RV64MV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload -; RV64MV-NEXT: addi sp, sp, 64 ; RV64MV-NEXT: ret %ld = load <3 x i33>, ptr %X %srem = srem <3 x i33> %ld, diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -521,43 +521,41 @@ ; ; RV32MV-LABEL: test_urem_vec: ; RV32MV: # %bb.0: -; RV32MV-NEXT: addi sp, sp, -16 ; RV32MV-NEXT: lw a1, 0(a0) ; RV32MV-NEXT: andi a2, a1, 2047 -; RV32MV-NEXT: sh a2, 8(sp) -; RV32MV-NEXT: slli a2, a1, 10 -; RV32MV-NEXT: srli a2, a2, 21 -; RV32MV-NEXT: sh a2, 10(sp) +; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32MV-NEXT: vslide1down.vx v8, v8, a2 ; RV32MV-NEXT: lbu a2, 4(a0) +; RV32MV-NEXT: slli a3, a1, 10 +; RV32MV-NEXT: srli a3, a3, 21 +; RV32MV-NEXT: vslide1down.vx v8, v8, a3 ; RV32MV-NEXT: slli a2, a2, 10 ; RV32MV-NEXT: srli a1, a1, 22 ; RV32MV-NEXT: or a1, a1, a2 ; RV32MV-NEXT: andi a1, a1, 2047 -; RV32MV-NEXT: sh a1, 12(sp) -; RV32MV-NEXT: addi a1, sp, 8 -; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32MV-NEXT: vle16.v v8, (a1) -; RV32MV-NEXT: vmv.v.i v9, 10 +; RV32MV-NEXT: vslide1down.vx v8, v8, a1 +; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) +; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0) +; RV32MV-NEXT: vle16.v v9, (a1) +; RV32MV-NEXT: vslide1down.vx v8, v8, a0 +; RV32MV-NEXT: vid.v v10 +; RV32MV-NEXT: vsub.vv v8, v8, v10 +; RV32MV-NEXT: vmul.vv v8, v8, v9 +; RV32MV-NEXT: vadd.vv v9, v8, v8 +; RV32MV-NEXT: vmv.v.i v10, 10 ; RV32MV-NEXT: li a1, 9 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; RV32MV-NEXT: vmv.s.x v9, a1 +; RV32MV-NEXT: vmv.s.x v10, a1 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV32MV-NEXT: vle16.v v10, (a1) -; RV32MV-NEXT: vid.v v11 -; RV32MV-NEXT: vsub.vv v8, v8, v11 -; RV32MV-NEXT: vmul.vv v8, v8, v10 -; RV32MV-NEXT: vadd.vv v10, v8, v8 -; RV32MV-NEXT: vsll.vv v9, v10, v9 +; RV32MV-NEXT: vsll.vv v9, v9, v10 +; RV32MV-NEXT: li a1, 2047 +; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: vmv.v.i v10, 0 -; RV32MV-NEXT: li a1, 1 +; RV32MV-NEXT: li a2, 1 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma ; RV32MV-NEXT: vmv1r.v v11, v10 -; RV32MV-NEXT: vmv.s.x v11, a1 -; RV32MV-NEXT: li a1, 2047 +; RV32MV-NEXT: vmv.s.x v11, a2 ; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV32MV-NEXT: vand.vx v8, v8, a1 ; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) ; RV32MV-NEXT: vle16.v v12, (a2) @@ -581,47 +579,44 @@ ; RV32MV-NEXT: or a1, a2, a1 ; RV32MV-NEXT: or a1, a1, a3 ; RV32MV-NEXT: sw a1, 0(a0) -; RV32MV-NEXT: addi sp, sp, 16 ; RV32MV-NEXT: ret ; ; RV64MV-LABEL: test_urem_vec: ; RV64MV: # %bb.0: -; RV64MV-NEXT: addi sp, sp, -16 ; RV64MV-NEXT: lbu a1, 4(a0) ; RV64MV-NEXT: lwu a2, 0(a0) ; RV64MV-NEXT: slli a1, a1, 32 ; RV64MV-NEXT: or a1, a2, a1 -; RV64MV-NEXT: srli a2, a1, 22 -; RV64MV-NEXT: sh a2, 12(sp) ; RV64MV-NEXT: andi a2, a1, 2047 -; RV64MV-NEXT: sh a2, 8(sp) -; RV64MV-NEXT: slli a1, a1, 42 -; RV64MV-NEXT: srli a1, a1, 53 -; RV64MV-NEXT: sh a1, 10(sp) -; RV64MV-NEXT: addi a1, sp, 8 ; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV64MV-NEXT: vle16.v v8, (a1) -; RV64MV-NEXT: vmv.v.i v9, 10 +; RV64MV-NEXT: vslide1down.vx v8, v8, a2 +; RV64MV-NEXT: slli a2, a1, 42 +; RV64MV-NEXT: srli a2, a2, 53 +; RV64MV-NEXT: vslide1down.vx v8, v8, a2 +; RV64MV-NEXT: srli a1, a1, 22 +; RV64MV-NEXT: vslide1down.vx v8, v8, a1 +; RV64MV-NEXT: lui a1, %hi(.LCPI4_0) +; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0) +; RV64MV-NEXT: vle16.v v9, (a1) +; RV64MV-NEXT: vslide1down.vx v8, v8, a0 +; RV64MV-NEXT: vid.v v10 +; RV64MV-NEXT: vsub.vv v8, v8, v10 +; RV64MV-NEXT: vmul.vv v8, v8, v9 +; RV64MV-NEXT: vadd.vv v9, v8, v8 +; RV64MV-NEXT: vmv.v.i v10, 10 ; RV64MV-NEXT: li a1, 9 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma -; RV64MV-NEXT: vmv.s.x v9, a1 +; RV64MV-NEXT: vmv.s.x v10, a1 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64MV-NEXT: lui a1, %hi(.LCPI4_0) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0) -; RV64MV-NEXT: vle16.v v10, (a1) -; RV64MV-NEXT: vid.v v11 -; RV64MV-NEXT: vsub.vv v8, v8, v11 -; RV64MV-NEXT: vmul.vv v8, v8, v10 -; RV64MV-NEXT: vadd.vv v10, v8, v8 -; RV64MV-NEXT: vsll.vv v9, v10, v9 +; RV64MV-NEXT: vsll.vv v9, v9, v10 +; RV64MV-NEXT: li a1, 2047 +; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: vmv.v.i v10, 0 -; RV64MV-NEXT: li a1, 1 +; RV64MV-NEXT: li a2, 1 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, tu, ma ; RV64MV-NEXT: vmv1r.v v11, v10 -; RV64MV-NEXT: vmv.s.x v11, a1 -; RV64MV-NEXT: li a1, 2047 +; RV64MV-NEXT: vmv.s.x v11, a2 ; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; RV64MV-NEXT: vand.vx v8, v8, a1 ; RV64MV-NEXT: lui a2, %hi(.LCPI4_1) ; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_1) ; RV64MV-NEXT: vle16.v v12, (a2) @@ -645,7 +640,6 @@ ; RV64MV-NEXT: slli a1, a1, 31 ; RV64MV-NEXT: srli a1, a1, 63 ; RV64MV-NEXT: sb a1, 4(a0) -; RV64MV-NEXT: addi sp, sp, 16 ; RV64MV-NEXT: ret %ld = load <3 x i11>, ptr %X %urem = urem <3 x i11> %ld,