diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2957,10 +2957,34 @@ static SDValue findVSplat(SDValue N) { SDValue Splat = N; + + // A common pattern on RV32 is a splat that's been obfuscated through + // bitcasts, e.g. (insert_subvector nxv2i64 (bitcast v2i64 (extract_subvector + // v4i32 (vmv_v_x_vl nxv4i32))) + // + // The index doesn't matter here since is it's inserting into undef. + if (Splat.getOpcode() == ISD::INSERT_SUBVECTOR && + Splat.getOperand(0).isUndef()) + Splat = Splat.getOperand(1); + Splat = peekThroughBitcasts(Splat); + // The index doesn't matter here because an extract of a splat will have the + // same value no matter where it extracts from. + if (Splat.getOpcode() == ISD::EXTRACT_SUBVECTOR) + Splat = Splat.getOperand(0); + if (Splat.getOpcode() != RISCVISD::VMV_V_X_VL || !Splat.getOperand(0).isUndef()) return SDValue(); assert(Splat.getNumOperands() == 3 && "Unexpected number of operands"); + + // If the original element size is smaller than the splat's, then from the + // perspective of N's type it's not a splat but a repeated sequence, e.g: + // + // (insert_subvector nxv4i32 (bitcast v4i32 (extract_subvector v2i64 + // (vmv_v_x_vl nxv2i64))) + if (N.getSimpleValueType().getScalarSizeInBits() < + Splat.getSimpleValueType().getScalarSizeInBits()) + return SDValue(); return Splat; } @@ -2969,6 +2993,10 @@ if (!Splat) return false; + if (Splat.getSimpleValueType().getVectorElementType() != + N.getSimpleValueType().getVectorElementType()) + return false; + SplatVal = Splat.getOperand(1); return true; } @@ -2981,7 +3009,12 @@ if (!Splat || !isa(Splat.getOperand(1))) return false; + // Extract the constant from a RISCVISD::VMV_V_X_VL. This takes into account + // the fact that the VMV_V_X_VL could have a smaller element type that was + // then bitcasted to VT's larger element type. + const unsigned EltSize = N.getScalarValueSizeInBits(); const unsigned SplatEltSize = Splat.getScalarValueSizeInBits(); + assert(EltSize >= SplatEltSize); assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() && "Unexpected splat operand type"); @@ -2994,7 +3027,13 @@ // sign-extending to (XLenVT -1). APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize); - int64_t SplatImm = SplatConst.getSExtValue(); + // Get the splat value, which may be smaller than N's element size if it was + // bitcasted. + APInt Const(EltSize, 0); + for (unsigned I = 0; I < EltSize / SplatEltSize; I++) + Const.insertBits(SplatConst, I * SplatEltSize); + + int64_t SplatImm = Const.getSExtValue(); if (!ValidateImm(SplatImm)) return false; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1463,10 +1463,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v9, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -1570,10 +1567,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v9, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -1681,10 +1675,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -1788,10 +1779,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -1899,10 +1887,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -2006,10 +1991,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v12, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -2117,10 +2099,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2224,10 +2203,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2335,10 +2311,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2442,10 +2415,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -2573,35 +2543,27 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 349525 ; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2645,7 +2607,7 @@ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -2658,7 +2620,7 @@ ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload @@ -2681,14 +2643,17 @@ ; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2715,18 +2680,8 @@ ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb @@ -2735,7 +2690,8 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2745,11 +2701,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb @@ -2787,12 +2739,15 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -2920,21 +2875,20 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmv8r.v v0, v16 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB35_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 2 @@ -2945,64 +2899,56 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a3, 349525 ; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 @@ -3013,39 +2959,34 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vx v0, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsrl.vi v8, v24, 1 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 -; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -3055,15 +2996,14 @@ ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -4590,10 +4530,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v9, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -4697,10 +4634,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v9, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v9 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v9 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -4806,10 +4740,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -4913,10 +4844,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v10, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -5022,10 +4950,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -5129,10 +5054,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v12, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 @@ -5238,10 +5160,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -5345,10 +5264,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -5454,10 +5370,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -5561,10 +5474,7 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 @@ -5690,35 +5600,27 @@ ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: lui a3, 349525 ; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -5762,7 +5664,7 @@ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -5775,7 +5677,7 @@ ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload @@ -5798,14 +5700,17 @@ ; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -5832,18 +5737,8 @@ ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb @@ -5852,7 +5747,8 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5862,11 +5758,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb @@ -5904,12 +5796,15 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -6037,21 +5932,20 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmv8r.v v0, v16 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB71_2 -; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB71_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB71_2: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 2 @@ -6062,64 +5956,56 @@ ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 16 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a3, 349525 ; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: lui a3, 61681 ; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 @@ -6130,39 +6016,34 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: vor.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vx v0, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsrl.vi v8, v24, 1 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vi v24, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 -; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -6172,15 +6053,14 @@ ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll @@ -475,10 +475,7 @@ ; LMULMAX2-RV32I-NEXT: li a1, 32 ; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v9, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 @@ -1172,10 +1169,7 @@ ; LMULMAX2-RV32I-NEXT: li a1, 32 ; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 @@ -1797,10 +1791,7 @@ ; LMULMAX2-RV32I-NEXT: li a1, 32 ; LMULMAX2-RV32I-NEXT: vsrl.vx v9, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v9 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v9, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v9 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 @@ -2464,10 +2455,7 @@ ; LMULMAX2-RV32I-NEXT: li a1, 32 ; LMULMAX2-RV32I-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV32I-NEXT: vor.vv v8, v8, v10 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 ; LMULMAX2-RV32I-NEXT: addi a1, a1, 1365 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1236,10 +1236,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -1323,10 +1320,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -1414,10 +1408,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -1501,10 +1492,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -1592,10 +1580,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -1679,10 +1664,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -1770,24 +1752,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1795,16 +1774,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 @@ -1858,24 +1837,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -1883,16 +1859,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 @@ -1950,24 +1926,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -1975,16 +1948,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 @@ -2038,24 +2011,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -2063,16 +2033,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 @@ -2130,16 +2100,15 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vslidedown.vi v24, v0, 2 @@ -2150,129 +2119,109 @@ ; RV32-NEXT: .LBB34_2: ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 56 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v8, -1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 349525 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 209715 -; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: lui a4, 61681 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: lui a4, 4112 -; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 @@ -2294,32 +2243,18 @@ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -2330,42 +2265,36 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2373,7 +2302,7 @@ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -2392,7 +2321,8 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2495,7 +2425,7 @@ ; RV32-LABEL: vp_cttz_v32i64_unmasked: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv8r.v v0, v16 +; RV32-NEXT: vmv8r.v v24, v16 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB35_2 ; RV32-NEXT: # %bb.1: @@ -2504,68 +2434,59 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a2 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: lui a3, 349525 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a4, 209715 -; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: lui a4, 61681 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: lui a4, 4112 -; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: csrr a3, vlenb @@ -2578,29 +2499,24 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsub.vx v8, v24, a2 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vand.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 -; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -2617,8 +2533,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -3898,10 +3813,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1, v0.t -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -3985,10 +3897,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsub.vx v9, v8, a1 -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v10 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -4074,10 +3983,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1, v0.t -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -4161,10 +4067,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsub.vx v10, v8, a1 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v12 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -4250,10 +4153,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -4337,10 +4237,7 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsub.vx v12, v8, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -4426,24 +4323,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -4451,16 +4345,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 @@ -4514,24 +4408,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -4539,16 +4430,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 @@ -4604,24 +4495,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t @@ -4629,16 +4517,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 @@ -4692,24 +4580,21 @@ ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a1, 349525 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 209715 +; RV32-NEXT: addi a1, a1, 819 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v24, v8, v16 ; RV32-NEXT: vsrl.vi v8, v8, 2 @@ -4717,16 +4602,16 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 61681 +; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: lui a2, 4112 -; RV32-NEXT: addi a2, a2, 257 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: addi a1, a1, 257 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 @@ -4782,16 +4667,15 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vslidedown.vi v24, v0, 2 @@ -4802,129 +4686,109 @@ ; RV32-NEXT: .LBB70_2: ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t +; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 56 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v8, -1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 56 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 349525 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 209715 -; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 56 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: lui a4, 61681 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: lui a4, 4112 -; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 @@ -4946,32 +4810,18 @@ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a0, a0, a3 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v16, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vnot.v v16, v16, v0.t ; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -4982,42 +4832,36 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5025,7 +4869,7 @@ ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 @@ -5044,7 +4888,8 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -5147,7 +4992,7 @@ ; RV32-LABEL: vp_cttz_zero_undef_v32i64_unmasked: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv8r.v v0, v16 +; RV32-NEXT: vmv8r.v v24, v16 ; RV32-NEXT: mv a1, a0 ; RV32-NEXT: bltu a0, a2, .LBB71_2 ; RV32-NEXT: # %bb.1: @@ -5156,68 +5001,59 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a2 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: lui a3, 349525 +; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a4, 209715 -; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a4 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vand.vv v16, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: lui a4, 61681 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: lui a4, 4112 -; RV32-NEXT: addi a4, a4, 257 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: lui a3, 4112 +; RV32-NEXT: addi a3, a3, 257 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v24 +; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1 ; RV32-NEXT: csrr a3, vlenb @@ -5230,29 +5066,24 @@ ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vxor.vv v8, v0, v8 -; RV32-NEXT: vsub.vx v0, v0, a2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsub.vx v8, v24, a2 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vand.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 -; RV32-NEXT: vand.vv v0, v8, v16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v0 ; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -5269,8 +5100,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -460,10 +460,7 @@ ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32I-NEXT: li a1, 1 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 @@ -545,23 +542,20 @@ ; LMULMAX2-RV32F: # %bb.0: ; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.i v9, 0 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 -; LMULMAX2-RV32F-NEXT: vsub.vv v9, v9, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v9, v8, v9 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v9 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v9, v10, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v9, v8 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v9 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v9, a1 +; LMULMAX2-RV32F-NEXT: vsub.vx v9, v10, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v9, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -589,21 +583,18 @@ ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vmv.v.i v9, 0 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 -; LMULMAX2-RV32D-NEXT: vsub.vv v9, v9, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX2-RV32D-NEXT: vand.vv v9, v8, v9 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v9, v9 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v9, v9, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsub.vx v9, v9, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v9, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -626,46 +617,24 @@ ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_v2i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 -; LMULMAX8-RV32-NEXT: vsub.vv v9, v9, v8 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-RV32-NEXT: fsrm a1 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_v2i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v9, v8, v9 -; LMULMAX8-RV64-NEXT: fsrmi a1, 1 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v9, v9 -; LMULMAX8-RV64-NEXT: fsrm a1 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vsrl.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: li a1, 1023 -; LMULMAX8-RV64-NEXT: vsub.vx v9, v9, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 64 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v9, a1, v0 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v2i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX8-NEXT: vle64.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX8-NEXT: vand.vv v9, v8, v9 +; LMULMAX8-NEXT: fsrmi a1, 1 +; LMULMAX8-NEXT: vfcvt.f.xu.v v9, v9 +; LMULMAX8-NEXT: fsrm a1 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vsrl.vx v9, v9, a1 +; LMULMAX8-NEXT: li a1, 1023 +; LMULMAX8-NEXT: vsub.vx v9, v9, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 64 +; LMULMAX8-NEXT: vmerge.vxm v8, v9, a1, v0 +; LMULMAX8-NEXT: vse64.v v8, (a0) +; LMULMAX8-NEXT: ret ; ; ZVBB-LABEL: cttz_v2i64: ; ZVBB: # %bb.0: @@ -1144,10 +1113,7 @@ ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32I-NEXT: li a1, 1 ; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v12, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v12 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 @@ -1229,23 +1195,20 @@ ; LMULMAX2-RV32F: # %bb.0: ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 -; LMULMAX2-RV32F-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v12, v10 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v12, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v8 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v12, v10 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v10, a1 +; LMULMAX2-RV32F-NEXT: vsub.vx v10, v12, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vi v0, v8, 0 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -1273,21 +1236,18 @@ ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 -; LMULMAX2-RV32D-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v10, v10, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vi v0, v8, 0 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -1310,46 +1270,24 @@ ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 -; LMULMAX8-RV32-NEXT: vsub.vv v10, v10, v8 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-RV32-NEXT: fsrm a1 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v10, v8, v10 -; LMULMAX8-RV64-NEXT: fsrmi a1, 1 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v10, v10 -; LMULMAX8-RV64-NEXT: fsrm a1 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vsrl.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: li a1, 1023 -; LMULMAX8-RV64-NEXT: vsub.vx v10, v10, a1 -; LMULMAX8-RV64-NEXT: vmseq.vi v0, v8, 0 -; LMULMAX8-RV64-NEXT: li a1, 64 -; LMULMAX8-RV64-NEXT: vmerge.vxm v8, v10, a1, v0 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_v4i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX8-NEXT: vle64.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX8-NEXT: vand.vv v10, v8, v10 +; LMULMAX8-NEXT: fsrmi a1, 1 +; LMULMAX8-NEXT: vfcvt.f.xu.v v10, v10 +; LMULMAX8-NEXT: fsrm a1 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vsrl.vx v10, v10, a1 +; LMULMAX8-NEXT: li a1, 1023 +; LMULMAX8-NEXT: vsub.vx v10, v10, a1 +; LMULMAX8-NEXT: vmseq.vi v0, v8, 0 +; LMULMAX8-NEXT: li a1, 64 +; LMULMAX8-NEXT: vmerge.vxm v8, v10, a1, v0 +; LMULMAX8-NEXT: vse64.v v8, (a0) +; LMULMAX8-NEXT: ret ; ; ZVBB-LABEL: cttz_v4i64: ; ZVBB: # %bb.0: @@ -1777,10 +1715,7 @@ ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32I-NEXT: li a1, 1 ; LMULMAX2-RV32I-NEXT: vsub.vx v9, v8, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v10, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v10 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32I-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 @@ -1862,10 +1797,7 @@ ; LMULMAX2-RV32F: # %bb.0: ; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.i v9, 0 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vsub.vv v9, v9, v8 +; LMULMAX2-RV32F-NEXT: vrsub.vi v9, v8, 0 ; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma @@ -1899,10 +1831,7 @@ ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vmv.v.i v9, 0 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vsub.vv v9, v9, v8 +; LMULMAX2-RV32D-NEXT: vrsub.vi v9, v8, 0 ; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 @@ -1930,40 +1859,21 @@ ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_zero_undef_v2i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v9, 0 -; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vsub.vv v9, v9, v8 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-RV32-NEXT: fsrm a1 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_zero_undef_v2i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v9, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v9 -; LMULMAX8-RV64-NEXT: fsrmi a1, 1 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-RV64-NEXT: fsrm a1 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: li a1, 1023 -; LMULMAX8-RV64-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_zero_undef_v2i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX8-NEXT: vle64.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v9, v8, 0 +; LMULMAX8-NEXT: vand.vv v8, v8, v9 +; LMULMAX8-NEXT: fsrmi a1, 1 +; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX8-NEXT: fsrm a1 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-NEXT: li a1, 1023 +; LMULMAX8-NEXT: vsub.vx v8, v8, a1 +; LMULMAX8-NEXT: vse64.v v8, (a0) +; LMULMAX8-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v2i64: ; ZVBB: # %bb.0: @@ -2418,10 +2328,7 @@ ; LMULMAX2-RV32I-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32I-NEXT: li a1, 1 ; LMULMAX2-RV32I-NEXT: vsub.vx v10, v8, a1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vmv.v.i v12, -1 -; LMULMAX2-RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32I-NEXT: vxor.vv v8, v8, v12 +; LMULMAX2-RV32I-NEXT: vnot.v v8, v8 ; LMULMAX2-RV32I-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32I-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32I-NEXT: lui a1, 349525 @@ -2503,10 +2410,7 @@ ; LMULMAX2-RV32F: # %bb.0: ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV32F-NEXT: vrsub.vi v10, v8, 0 ; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -2540,10 +2444,7 @@ ; LMULMAX2-RV32D: # %bb.0: ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32D-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vsub.vv v10, v10, v8 +; LMULMAX2-RV32D-NEXT: vrsub.vi v10, v8, 0 ; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 @@ -2571,40 +2472,21 @@ ; LMULMAX2-RV64D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64D-NEXT: ret ; -; LMULMAX8-RV32-LABEL: cttz_zero_undef_v4i64: -; LMULMAX8-RV32: # %bb.0: -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 -; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vsub.vv v10, v10, v8 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-RV32-NEXT: fsrm a1 -; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV32-NEXT: ret -; -; LMULMAX8-RV64-LABEL: cttz_zero_undef_v4i64: -; LMULMAX8-RV64: # %bb.0: -; LMULMAX8-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX8-RV64-NEXT: vrsub.vi v10, v8, 0 -; LMULMAX8-RV64-NEXT: vand.vv v8, v8, v10 -; LMULMAX8-RV64-NEXT: fsrmi a1, 1 -; LMULMAX8-RV64-NEXT: vfcvt.f.xu.v v8, v8 -; LMULMAX8-RV64-NEXT: fsrm a1 -; LMULMAX8-RV64-NEXT: li a1, 52 -; LMULMAX8-RV64-NEXT: vsrl.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: li a1, 1023 -; LMULMAX8-RV64-NEXT: vsub.vx v8, v8, a1 -; LMULMAX8-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX8-RV64-NEXT: ret +; LMULMAX8-LABEL: cttz_zero_undef_v4i64: +; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX8-NEXT: vle64.v v8, (a0) +; LMULMAX8-NEXT: vrsub.vi v10, v8, 0 +; LMULMAX8-NEXT: vand.vv v8, v8, v10 +; LMULMAX8-NEXT: fsrmi a1, 1 +; LMULMAX8-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX8-NEXT: fsrm a1 +; LMULMAX8-NEXT: li a1, 52 +; LMULMAX8-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-NEXT: li a1, 1023 +; LMULMAX8-NEXT: vsub.vx v8, v8, a1 +; LMULMAX8-NEXT: vse64.v v8, (a0) +; LMULMAX8-NEXT: ret ; ; ZVBB-LABEL: cttz_zero_undef_v4i64: ; ZVBB: # %bb.0: @@ -2619,3 +2501,6 @@ store <4 x i64> %c, ptr %x ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; LMULMAX8-RV32: {{.*}} +; LMULMAX8-RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -522,408 +522,216 @@ declare <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i1>, i32) define <2 x i64> @fshr_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshr_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vx v11, v10, a1, v0.t -; RV32-NEXT: vsrl.vv v9, v9, v11, v0.t -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v11, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v10, v10, v11, v0.t -; RV32-NEXT: vand.vx v10, v10, a1, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshr_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vand.vx v11, v10, a1, v0.t -; RV64-NEXT: vsrl.vv v9, v9, v11, v0.t -; RV64-NEXT: vnot.v v10, v10, v0.t -; RV64-NEXT: vand.vx v10, v10, a1, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshr_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vand.vx v11, v10, a1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t +; CHECK-NEXT: vnot.v v10, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret %res = call <2 x i64> @llvm.vp.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 %evl) ret <2 x i64> %res } declare <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i1>, i32) define <2 x i64> @fshl_v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshl_v2i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vx v11, v10, a1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v11, v0.t -; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v11, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vxor.vv v10, v10, v11, v0.t -; RV32-NEXT: vand.vx v10, v10, a1, v0.t -; RV32-NEXT: vsrl.vi v9, v9, 1, v0.t -; RV32-NEXT: vsrl.vv v9, v9, v10, v0.t -; RV32-NEXT: vor.vv v8, v8, v9, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshl_v2i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vand.vx v11, v10, a1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v11, v0.t -; RV64-NEXT: vnot.v v10, v10, v0.t -; RV64-NEXT: vand.vx v10, v10, a1, v0.t -; RV64-NEXT: vsrl.vi v9, v9, 1, v0.t -; RV64-NEXT: vsrl.vv v9, v9, v10, v0.t -; RV64-NEXT: vor.vv v8, v8, v9, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshl_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: vand.vx v11, v10, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v11, v0.t +; CHECK-NEXT: vnot.v v10, v10, v0.t +; CHECK-NEXT: vand.vx v10, v10, a1, v0.t +; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t +; CHECK-NEXT: vsrl.vv v9, v9, v10, v0.t +; CHECK-NEXT: vor.vv v8, v8, v9, v0.t +; CHECK-NEXT: ret %res = call <2 x i64> @llvm.vp.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i1> %m, i32 %evl) ret <2 x i64> %res } declare <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>, <4 x i1>, i32) define <4 x i64> @fshr_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshr_v4i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vx v14, v12, a1, v0.t -; RV32-NEXT: vsrl.vv v10, v10, v14, v0.t -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v14, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v12, v12, v14, v0.t -; RV32-NEXT: vand.vx v12, v12, a1, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshr_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vand.vx v14, v12, a1, v0.t -; RV64-NEXT: vsrl.vv v10, v10, v14, v0.t -; RV64-NEXT: vnot.v v12, v12, v0.t -; RV64-NEXT: vand.vx v12, v12, a1, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v12, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshr_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vand.vx v14, v12, a1, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v14, v0.t +; CHECK-NEXT: vnot.v v12, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v12, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret %res = call <4 x i64> @llvm.vp.fshr.v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 %evl) ret <4 x i64> %res } declare <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>, <4 x i1>, i32) define <4 x i64> @fshl_v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshl_v4i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vx v14, v12, a1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v14, v0.t -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v14, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vxor.vv v12, v12, v14, v0.t -; RV32-NEXT: vand.vx v12, v12, a1, v0.t -; RV32-NEXT: vsrl.vi v10, v10, 1, v0.t -; RV32-NEXT: vsrl.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshl_v4i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vand.vx v14, v12, a1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v14, v0.t -; RV64-NEXT: vnot.v v12, v12, v0.t -; RV64-NEXT: vand.vx v12, v12, a1, v0.t -; RV64-NEXT: vsrl.vi v10, v10, 1, v0.t -; RV64-NEXT: vsrl.vv v10, v10, v12, v0.t -; RV64-NEXT: vor.vv v8, v8, v10, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshl_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; CHECK-NEXT: vand.vx v14, v12, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v14, v0.t +; CHECK-NEXT: vnot.v v12, v12, v0.t +; CHECK-NEXT: vand.vx v12, v12, a1, v0.t +; CHECK-NEXT: vsrl.vi v10, v10, 1, v0.t +; CHECK-NEXT: vsrl.vv v10, v10, v12, v0.t +; CHECK-NEXT: vor.vv v8, v8, v10, v0.t +; CHECK-NEXT: ret %res = call <4 x i64> @llvm.vp.fshl.v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i1> %m, i32 %evl) ret <4 x i64> %res } declare <7 x i64> @llvm.vp.fshr.v7i64(<7 x i64>, <7 x i64>, <7 x i64>, <7 x i1>, i32) define <7 x i64> @fshr_v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshr_v7i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vx v20, v16, a1, v0.t -; RV32-NEXT: vsrl.vv v12, v12, v20, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v20, v0.t -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshr_v7i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vand.vx v20, v16, a1, v0.t -; RV64-NEXT: vsrl.vv v12, v12, v20, v0.t -; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshr_v7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vand.vx v20, v16, a1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t +; CHECK-NEXT: vnot.v v16, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret %res = call <7 x i64> @llvm.vp.fshr.v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> %m, i32 %evl) ret <7 x i64> %res } declare <7 x i64> @llvm.vp.fshl.v7i64(<7 x i64>, <7 x i64>, <7 x i64>, <7 x i1>, i32) define <7 x i64> @fshl_v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshl_v7i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vx v20, v16, a1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v20, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v20, v0.t -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vsrl.vi v12, v12, 1, v0.t -; RV32-NEXT: vsrl.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshl_v7i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vand.vx v20, v16, a1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v20, v0.t -; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsrl.vi v12, v12, 1, v0.t -; RV64-NEXT: vsrl.vv v12, v12, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshl_v7i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vand.vx v20, v16, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t +; CHECK-NEXT: vnot.v v16, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret %res = call <7 x i64> @llvm.vp.fshl.v7i64(<7 x i64> %a, <7 x i64> %b, <7 x i64> %c, <7 x i1> %m, i32 %evl) ret <7 x i64> %res } declare <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i1>, i32) define <8 x i64> @fshr_v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshr_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vx v20, v16, a1, v0.t -; RV32-NEXT: vsrl.vv v12, v12, v20, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v20, v0.t -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vsll.vi v8, v8, 1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshr_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vand.vx v20, v16, a1, v0.t -; RV64-NEXT: vsrl.vv v12, v12, v20, v0.t -; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsll.vi v8, v8, 1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshr_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vand.vx v20, v16, a1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v20, v0.t +; CHECK-NEXT: vnot.v v16, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret %res = call <8 x i64> @llvm.vp.fshr.v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 %evl) ret <8 x i64> %res } declare <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>, <8 x i1>, i32) define <8 x i64> @fshl_v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshl_v8i64: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vx v20, v16, a1, v0.t -; RV32-NEXT: vsll.vv v8, v8, v20, v0.t -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v20, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vxor.vv v16, v16, v20, v0.t -; RV32-NEXT: vand.vx v16, v16, a1, v0.t -; RV32-NEXT: vsrl.vi v12, v12, 1, v0.t -; RV32-NEXT: vsrl.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: fshl_v8i64: -; RV64: # %bb.0: -; RV64-NEXT: li a1, 63 -; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vand.vx v20, v16, a1, v0.t -; RV64-NEXT: vsll.vv v8, v8, v20, v0.t -; RV64-NEXT: vnot.v v16, v16, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsrl.vi v12, v12, 1, v0.t -; RV64-NEXT: vsrl.vv v12, v12, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v12, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: fshl_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 63 +; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; CHECK-NEXT: vand.vx v20, v16, a1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v20, v0.t +; CHECK-NEXT: vnot.v v16, v16, v0.t +; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vsrl.vi v12, v12, 1, v0.t +; CHECK-NEXT: vsrl.vv v12, v12, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v12, v0.t +; CHECK-NEXT: ret %res = call <8 x i64> @llvm.vp.fshl.v8i64(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c, <8 x i1> %m, i32 %evl) ret <8 x i64> %res } declare <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64>, <16 x i64>, <16 x i64>, <16 x i1>, i32) define <16 x i64> @fshr_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshr_v16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vx v8, v24, a0, v0.t -; RV32-NEXT: vsrl.vv v16, v16, v8, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v8, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v24, v8, v0.t -; RV32-NEXT: vand.vx v8, v8, a0, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsll.vi v24, v24, 1, v0.t -; RV32-NEXT: vsll.vv v8, v24, v8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: fshr_v16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v24, a0, v0.t -; RV64-NEXT: vsrl.vv v16, v16, v8, v0.t -; RV64-NEXT: vnot.v v8, v24, v0.t -; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsll.vi v24, v24, 1, v0.t -; RV64-NEXT: vsll.vv v8, v24, v8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: fshr_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vand.vx v8, v24, a0, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t +; CHECK-NEXT: vnot.v v8, v24, v0.t +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsll.vi v24, v24, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v24, v8, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %res = call <16 x i64> @llvm.vp.fshr.v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 x i1> %m, i32 %evl) ret <16 x i64> %res } declare <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64>, <16 x i64>, <16 x i64>, <16 x i1>, i32) define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: fshl_v16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v16, v8 -; RV32-NEXT: li a0, 63 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vx v8, v24, a0, v0.t -; RV32-NEXT: vsll.vv v8, v16, v8, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v24, v16, v0.t -; RV32-NEXT: vand.vx v16, v16, a0, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v24, 1, v0.t -; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: fshl_v16i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vmv8r.v v16, v8 -; RV64-NEXT: li a0, 63 -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v24, a0, v0.t -; RV64-NEXT: vsll.vv v8, v16, v8, v0.t -; RV64-NEXT: vnot.v v16, v24, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v24, v24, 1, v0.t -; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: fshl_v16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vand.vx v8, v24, a0, v0.t +; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t +; CHECK-NEXT: vnot.v v16, v24, v0.t +; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsrl.vi v24, v24, 1, v0.t +; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t +; CHECK-NEXT: vor.vv v8, v8, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %res = call <16 x i64> @llvm.vp.fshl.v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 x i1> %m, i32 %evl) ret <16 x i64> %res } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll @@ -759,32 +759,17 @@ ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; -; LMULMAX1-RV32-LABEL: splat_allones_with_use_v4i64: -; LMULMAX1-RV32: # %bb.0: -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.i v10, -1 -; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) -; LMULMAX1-RV32-NEXT: ret -; -; LMULMAX1-RV64-LABEL: splat_allones_with_use_v4i64: -; LMULMAX1-RV64: # %bb.0: -; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v8, (a1) -; LMULMAX1-RV64-NEXT: vle64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vadd.vi v8, v8, -1 -; LMULMAX1-RV64-NEXT: vadd.vi v9, v9, -1 -; LMULMAX1-RV64-NEXT: vse64.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v8, (a1) -; LMULMAX1-RV64-NEXT: ret +; LMULMAX1-LABEL: splat_allones_with_use_v4i64: +; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; LMULMAX1-NEXT: addi a1, a0, 16 +; LMULMAX1-NEXT: vle64.v v8, (a1) +; LMULMAX1-NEXT: vle64.v v9, (a0) +; LMULMAX1-NEXT: vadd.vi v8, v8, -1 +; LMULMAX1-NEXT: vadd.vi v9, v9, -1 +; LMULMAX1-NEXT: vse64.v v9, (a0) +; LMULMAX1-NEXT: vse64.v v8, (a1) +; LMULMAX1-NEXT: ret %a = load <4 x i64>, ptr %x %b = add <4 x i64> %a, store <4 x i64> %b, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -397,43 +397,22 @@ declare <32 x i32> @llvm.masked.load.v32i32(ptr, i32, <32 x i1>, <32 x i32>) define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { -; RV32-LABEL: masked_load_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: addi a3, a1, 128 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v16, (a3) -; RV32-NEXT: vle64.v v0, (a1) -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v0, v24 -; RV32-NEXT: vmseq.vv v0, v16, v24 -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vle64.v v16, (a1), v0.t -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vle64.v v8, (a0), v0.t -; RV32-NEXT: vse64.v v8, (a2) -; RV32-NEXT: addi a0, a2, 128 -; RV32-NEXT: vse64.v v16, (a0) -; RV32-NEXT: ret -; -; RV64-LABEL: masked_load_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: addi a3, a1, 128 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v16, (a1) -; RV64-NEXT: vle64.v v24, (a3) -; RV64-NEXT: vmseq.vi v8, v16, 0 -; RV64-NEXT: vmseq.vi v0, v24, 0 -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: vle64.v v16, (a1), v0.t -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vle64.v v8, (a0), v0.t -; RV64-NEXT: vse64.v v8, (a2) -; RV64-NEXT: addi a0, a2, 128 -; RV64-NEXT: vse64.v v16, (a0) -; RV64-NEXT: ret +; CHECK-LABEL: masked_load_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a3, a1, 128 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v24, (a3) +; CHECK-NEXT: vmseq.vi v8, v16, 0 +; CHECK-NEXT: vmseq.vi v0, v24, 0 +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v16, (a1), v0.t +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: vse64.v v8, (a2) +; CHECK-NEXT: addi a0, a2, 128 +; CHECK-NEXT: vse64.v v16, (a0) +; CHECK-NEXT: ret %m = load <32 x i64>, ptr %m_ptr %mask = icmp eq <32 x i64> %m, zeroinitializer %load = call <32 x i64> @llvm.masked.load.v32i64(ptr %a, i32 8, <32 x i1> %mask, <32 x i64> undef) @@ -547,3 +526,6 @@ ret void } declare <256 x i8> @llvm.masked.load.v256i8(ptr, i32, <256 x i1>, <256 x i8>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -397,87 +397,44 @@ declare void @llvm.masked.store.v32i32.p0(<32 x i32>, ptr, i32, <32 x i1>) define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { -; RV32-LABEL: masked_store_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: addi a3, a2, 128 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v8, (a3) -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vle64.v v24, (a2) -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v1, v24, v8 -; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle64.v v24, (a2) -; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v0, v16, v8 -; RV32-NEXT: addi a0, a1, 128 -; RV32-NEXT: vse64.v v24, (a0), v0.t -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vse64.v v8, (a1), v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: masked_store_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 -; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a2) -; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: vle64.v v16, (a2) -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmseq.vi v8, v16, 0 -; RV64-NEXT: vse64.v v24, (a1), v0.t -; RV64-NEXT: addi a0, a1, 128 -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a0), v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: masked_store_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: addi a2, a2, 128 +; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vi v8, v16, 0 +; CHECK-NEXT: vse64.v v24, (a1), v0.t +; CHECK-NEXT: addi a0, a1, 128 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %m = load <32 x i64>, ptr %m_ptr %mask = icmp eq <32 x i64> %m, zeroinitializer %val = load <32 x i64>, ptr %val_ptr @@ -683,3 +640,6 @@ ret void } declare void @llvm.masked.store.v256i8.p0(<256 x i8>, ptr, i32, <256 x i1>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1526,50 +1526,26 @@ declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vadd_vx_v32i64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v1, v0, 2 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB108_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB108_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vx_v32i64: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: li a2, 16 -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB108_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB108_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vx_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB108_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB108_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 %evl) @@ -1577,44 +1553,23 @@ } define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { -; RV32-LABEL: vadd_vi_v32i64_unmasked: -; RV32: # %bb.0: -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB109_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB109_2: -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v24 -; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: sltu a0, a0, a1 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vi_v32i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: li a2, 16 -; RV64-NEXT: mv a1, a0 -; RV64-NEXT: bltu a0, a2, .LBB109_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a1, 16 -; RV64-NEXT: .LBB109_2: -; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1 -; RV64-NEXT: addi a1, a0, -16 -; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v16, v16, -1 -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vi_v32i64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: bltu a0, a2, .LBB109_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 16 +; CHECK-NEXT: .LBB109_2: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1 +; CHECK-NEXT: addi a1, a0, -16 +; CHECK-NEXT: sltu a0, a0, a1 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v16, -1 +; CHECK-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer %head = insertelement <32 x i1> poison, i1 true, i32 0 @@ -1626,20 +1581,11 @@ ; FIXME: We don't match vadd.vi on RV32. define <32 x i64> @vadd_vx_v32i64_evl12(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vadd_vx_v32i64_evl12: -; RV32: # %bb.0: -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vx_v32i64_evl12: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 12, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vx_v32i64_evl12: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 12, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 12) @@ -1647,30 +1593,16 @@ } define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { -; RV32-LABEL: vadd_vx_v32i64_evl27: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v1, v0, 2 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vadd.vv v16, v16, v24, v0.t -; RV32-NEXT: ret -; -; RV64-LABEL: vadd_vx_v32i64_evl27: -; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vadd.vi v16, v16, -1, v0.t -; RV64-NEXT: ret +; CHECK-LABEL: vadd_vx_v32i64_evl27: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: vsetivli zero, 11, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer %v = call <32 x i64> @llvm.vp.add.v32i64(<32 x i64> %va, <32 x i64> %vb, <32 x i1> %m, i32 27) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrol.ll @@ -879,12 +879,11 @@ ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 ; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a0 -; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vrsub.vi v11, v9, 0 ; CHECK-RV32-NEXT: vand.vv v11, v11, v10 ; CHECK-RV32-NEXT: vsrl.vv v11, v8, v11 ; CHECK-RV32-NEXT: vand.vv v9, v9, v10 @@ -916,19 +915,20 @@ define <1 x i64> @vrol_vx_v1i64(<1 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vrol_vx_v1i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v9, 0 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 ; CHECK-RV32-NEXT: li a1, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-RV32-NEXT: vmv.s.x v9, a1 -; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vand.vv v10, v10, v9 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: vmv.s.x v10, a1 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vrol_vx_v1i64: @@ -958,32 +958,17 @@ declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) define <2 x i64> @vrol_vv_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-RV32-LABEL: vrol_vv_v2i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vand.vx v10, v9, a0 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v11, 0 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vrol_vv_v2i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV64-NEXT: vand.vx v10, v9, a0 -; CHECK-RV64-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 -; CHECK-RV64-NEXT: vand.vx v9, v9, a0 -; CHECK-RV64-NEXT: vsrl.vv v8, v8, v9 -; CHECK-RV64-NEXT: vor.vv v8, v10, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vrol_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vv_v2i64: ; CHECK-ZVBB: # %bb.0: @@ -995,34 +980,18 @@ } define <2 x i64> @vrol_vx_v2i64(<2 x i64> %a, i64 %b) { -; CHECK-RV32-LABEL: vrol_vx_v2i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: li a1, 63 -; CHECK-RV32-NEXT: vand.vx v9, v9, a1 -; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 -; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 -; CHECK-RV32-NEXT: vand.vx v10, v10, a1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vrol_vx_v2i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV64-NEXT: vmv.v.x v9, a0 -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vand.vx v10, v9, a0 -; CHECK-RV64-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 -; CHECK-RV64-NEXT: vand.vx v9, v9, a0 -; CHECK-RV64-NEXT: vsrl.vv v8, v8, v9 -; CHECK-RV64-NEXT: vor.vv v8, v10, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vrol_vx_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsll.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vx_v2i64: ; CHECK-ZVBB: # %bb.0: @@ -1038,32 +1007,17 @@ declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) define <4 x i64> @vrol_vv_v4i64(<4 x i64> %a, <4 x i64> %b) { -; CHECK-RV32-LABEL: vrol_vv_v4i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vand.vx v12, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v14, 0 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vrol_vv_v4i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV64-NEXT: vand.vx v12, v10, a0 -; CHECK-RV64-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV64-NEXT: vand.vx v10, v10, a0 -; CHECK-RV64-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV64-NEXT: vor.vv v8, v12, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vrol_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vv_v4i64: ; CHECK-ZVBB: # %bb.0: @@ -1075,34 +1029,18 @@ } define <4 x i64> @vrol_vx_v4i64(<4 x i64> %a, i64 %b) { -; CHECK-RV32-LABEL: vrol_vx_v4i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: li a1, 63 -; CHECK-RV32-NEXT: vand.vx v10, v10, a1 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 -; CHECK-RV32-NEXT: vand.vx v12, v12, a1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vrol_vx_v4i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV64-NEXT: vmv.v.x v10, a0 -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vand.vx v12, v10, a0 -; CHECK-RV64-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV64-NEXT: vand.vx v10, v10, a0 -; CHECK-RV64-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV64-NEXT: vor.vv v8, v12, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vrol_vx_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsll.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vx_v4i64: ; CHECK-ZVBB: # %bb.0: @@ -1118,32 +1056,17 @@ declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) define <8 x i64> @vrol_vv_v8i64(<8 x i64> %a, <8 x i64> %b) { -; CHECK-RV32-LABEL: vrol_vv_v8i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vand.vx v16, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v16, v8, v16 -; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v20, 0 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vrol_vv_v8i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV64-NEXT: vand.vx v16, v12, a0 -; CHECK-RV64-NEXT: vsll.vv v16, v8, v16 -; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV64-NEXT: vand.vx v12, v12, a0 -; CHECK-RV64-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV64-NEXT: vor.vv v8, v16, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vrol_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vv_v8i64: ; CHECK-ZVBB: # %bb.0: @@ -1155,34 +1078,18 @@ } define <8 x i64> @vrol_vx_v8i64(<8 x i64> %a, i64 %b) { -; CHECK-RV32-LABEL: vrol_vx_v8i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: li a1, 63 -; CHECK-RV32-NEXT: vand.vx v12, v12, a1 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v16, 0 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v16, v16, a0 -; CHECK-RV32-NEXT: vand.vx v16, v16, a1 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vrol_vx_v8i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV64-NEXT: vmv.v.x v12, a0 -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vand.vx v16, v12, a0 -; CHECK-RV64-NEXT: vsll.vv v16, v8, v16 -; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV64-NEXT: vand.vx v12, v12, a0 -; CHECK-RV64-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV64-NEXT: vor.vv v8, v16, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vrol_vx_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsll.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsrl.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vx_v8i64: ; CHECK-ZVBB: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vror.ll @@ -1558,12 +1558,11 @@ ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v11, v10, v9 ; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; CHECK-RV32-NEXT: vmv.s.x v10, a0 -; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vrsub.vi v11, v9, 0 ; CHECK-RV32-NEXT: vand.vv v11, v11, v10 ; CHECK-RV32-NEXT: vsll.vv v11, v8, v11 ; CHECK-RV32-NEXT: vand.vv v9, v9, v10 @@ -1595,19 +1594,20 @@ define <1 x i64> @vror_vx_v1i64(<1 x i64> %a, i64 %b) { ; CHECK-RV32-LABEL: vror_vx_v1i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v9, 0 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 +; CHECK-RV32-NEXT: vmv.v.x v9, a0 +; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 0 ; CHECK-RV32-NEXT: li a1, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma -; CHECK-RV32-NEXT: vmv.s.x v9, a1 -; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-RV32-NEXT: vand.vv v10, v10, v9 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v8, v10 +; CHECK-RV32-NEXT: vmv.s.x v10, a1 +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV32-NEXT: vand.vv v9, v9, v10 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vx_v1i64: @@ -1639,13 +1639,12 @@ ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-RV32-NEXT: vmv.v.i v9, 0 -; CHECK-RV32-NEXT: li a0, 1 -; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 ; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a0 -; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 ; CHECK-RV32-NEXT: vand.vv v10, v10, v9 ; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 ; CHECK-RV32-NEXT: vand.vi v9, v9, 1 @@ -1676,13 +1675,12 @@ ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-RV32-NEXT: vmv.v.i v9, 0 -; CHECK-RV32-NEXT: li a0, 1 -; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v9, a0 ; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetvli zero, zero, e32, mf2, tu, ma ; CHECK-RV32-NEXT: vmv.s.x v9, a0 -; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 ; CHECK-RV32-NEXT: vand.vv v10, v10, v9 ; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 ; CHECK-RV32-NEXT: vand.vi v9, v9, 1 @@ -1712,32 +1710,17 @@ declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) define <2 x i64> @vror_vv_v2i64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-RV32-LABEL: vror_vv_v2i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vand.vx v10, v9, a0 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v11, 0 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vv v9, v11, v9 -; CHECK-RV32-NEXT: vand.vx v9, v9, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v9 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vror_vv_v2i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV64-NEXT: vand.vx v10, v9, a0 -; CHECK-RV64-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 -; CHECK-RV64-NEXT: vand.vx v9, v9, a0 -; CHECK-RV64-NEXT: vsll.vv v8, v8, v9 -; CHECK-RV64-NEXT: vor.vv v8, v10, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vror_vv_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vv_v2i64: ; CHECK-ZVBB: # %bb.0: @@ -1749,34 +1732,18 @@ } define <2 x i64> @vror_vx_v2i64(<2 x i64> %a, i64 %b) { -; CHECK-RV32-LABEL: vror_vx_v2i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: li a1, 63 -; CHECK-RV32-NEXT: vand.vx v9, v9, a1 -; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 -; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a0 -; CHECK-RV32-NEXT: vand.vx v10, v10, a1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vror_vx_v2i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV64-NEXT: vmv.v.x v9, a0 -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vand.vx v10, v9, a0 -; CHECK-RV64-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV64-NEXT: vrsub.vi v9, v9, 0 -; CHECK-RV64-NEXT: vand.vx v9, v9, a0 -; CHECK-RV64-NEXT: vsll.vv v8, v8, v9 -; CHECK-RV64-NEXT: vor.vv v8, v10, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vror_vx_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vand.vx v10, v9, a0 +; CHECK-NEXT: vsrl.vv v10, v8, v10 +; CHECK-NEXT: vrsub.vi v9, v9, 0 +; CHECK-NEXT: vand.vx v9, v9, a0 +; CHECK-NEXT: vsll.vv v8, v8, v9 +; CHECK-NEXT: vor.vv v8, v10, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vx_v2i64: ; CHECK-ZVBB: # %bb.0: @@ -1792,19 +1759,16 @@ define <2 x i64> @vror_vi_v2i64(<2 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: vand.vi v9, v9, 1 -; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 -; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: li a1, 1 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a1 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 +; CHECK-RV32-NEXT: vmv.v.i v9, 1 +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v2i64: @@ -1828,19 +1792,16 @@ define <2 x i64> @vror_vi_rotl_v2i64(<2 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v2i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v9, a0 -; CHECK-RV32-NEXT: vand.vi v9, v9, 1 -; CHECK-RV32-NEXT: vsll.vv v9, v8, v9 -; CHECK-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v10, 0 -; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsub.vx v10, v10, a1 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v9, v8 +; CHECK-RV32-NEXT: vmv.v.i v9, 1 +; CHECK-RV32-NEXT: vrsub.vi v9, v9, 0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v9, v9, a0 +; CHECK-RV32-NEXT: vsrl.vv v9, v8, v9 +; CHECK-RV32-NEXT: vmv.v.x v10, a0 +; CHECK-RV32-NEXT: vand.vi v10, v10, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 +; CHECK-RV32-NEXT: vor.vv v8, v8, v9 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v2i64: @@ -1865,32 +1826,17 @@ declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) define <4 x i64> @vror_vv_v4i64(<4 x i64> %a, <4 x i64> %b) { -; CHECK-RV32-LABEL: vror_vv_v4i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vand.vx v12, v10, a0 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v14, 0 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vv v10, v14, v10 -; CHECK-RV32-NEXT: vand.vx v10, v10, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vror_vv_v4i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV64-NEXT: vand.vx v12, v10, a0 -; CHECK-RV64-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV64-NEXT: vand.vx v10, v10, a0 -; CHECK-RV64-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV64-NEXT: vor.vv v8, v12, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vror_vv_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vv_v4i64: ; CHECK-ZVBB: # %bb.0: @@ -1902,34 +1848,18 @@ } define <4 x i64> @vror_vx_v4i64(<4 x i64> %a, i64 %b) { -; CHECK-RV32-LABEL: vror_vx_v4i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: li a1, 63 -; CHECK-RV32-NEXT: vand.vx v10, v10, a1 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a0 -; CHECK-RV32-NEXT: vand.vx v12, v12, a1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vror_vx_v4i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV64-NEXT: vmv.v.x v10, a0 -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vand.vx v12, v10, a0 -; CHECK-RV64-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV64-NEXT: vrsub.vi v10, v10, 0 -; CHECK-RV64-NEXT: vand.vx v10, v10, a0 -; CHECK-RV64-NEXT: vsll.vv v8, v8, v10 -; CHECK-RV64-NEXT: vor.vv v8, v12, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vror_vx_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vand.vx v12, v10, a0 +; CHECK-NEXT: vsrl.vv v12, v8, v12 +; CHECK-NEXT: vrsub.vi v10, v10, 0 +; CHECK-NEXT: vand.vx v10, v10, a0 +; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vor.vv v8, v12, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vx_v4i64: ; CHECK-ZVBB: # %bb.0: @@ -1945,19 +1875,16 @@ define <4 x i64> @vror_vi_v4i64(<4 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v4i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 -; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: li a1, 1 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a1 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v4i64: @@ -1981,19 +1908,16 @@ define <4 x i64> @vror_vi_rotl_v4i64(<4 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v4i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v10, a0 -; CHECK-RV32-NEXT: vand.vi v10, v10, 1 -; CHECK-RV32-NEXT: vsll.vv v10, v8, v10 -; CHECK-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v12, 0 -; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-RV32-NEXT: vsub.vx v12, v12, a1 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v10, v8 +; CHECK-RV32-NEXT: vmv.v.i v10, 1 +; CHECK-RV32-NEXT: vrsub.vi v10, v10, 0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v10, v10, a0 +; CHECK-RV32-NEXT: vsrl.vv v10, v8, v10 +; CHECK-RV32-NEXT: vmv.v.x v12, a0 +; CHECK-RV32-NEXT: vand.vi v12, v12, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 +; CHECK-RV32-NEXT: vor.vv v8, v8, v10 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v4i64: @@ -2018,32 +1942,17 @@ declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) define <8 x i64> @vror_vv_v8i64(<8 x i64> %a, <8 x i64> %b) { -; CHECK-RV32-LABEL: vror_vv_v8i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vand.vx v16, v12, a0 -; CHECK-RV32-NEXT: vsrl.vv v16, v8, v16 -; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v20, 0 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vv v12, v20, v12 -; CHECK-RV32-NEXT: vand.vx v12, v12, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV32-NEXT: vor.vv v8, v16, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vror_vv_v8i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV64-NEXT: vand.vx v16, v12, a0 -; CHECK-RV64-NEXT: vsrl.vv v16, v8, v16 -; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV64-NEXT: vand.vx v12, v12, a0 -; CHECK-RV64-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV64-NEXT: vor.vv v8, v16, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vror_vv_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vv_v8i64: ; CHECK-ZVBB: # %bb.0: @@ -2055,34 +1964,18 @@ } define <8 x i64> @vror_vx_v8i64(<8 x i64> %a, i64 %b) { -; CHECK-RV32-LABEL: vror_vx_v8i64: -; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: li a1, 63 -; CHECK-RV32-NEXT: vand.vx v12, v12, a1 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v16, 0 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v16, v16, a0 -; CHECK-RV32-NEXT: vand.vx v16, v16, a1 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 -; CHECK-RV32-NEXT: ret -; -; CHECK-RV64-LABEL: vror_vx_v8i64: -; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV64-NEXT: vmv.v.x v12, a0 -; CHECK-RV64-NEXT: li a0, 63 -; CHECK-RV64-NEXT: vand.vx v16, v12, a0 -; CHECK-RV64-NEXT: vsrl.vv v16, v8, v16 -; CHECK-RV64-NEXT: vrsub.vi v12, v12, 0 -; CHECK-RV64-NEXT: vand.vx v12, v12, a0 -; CHECK-RV64-NEXT: vsll.vv v8, v8, v12 -; CHECK-RV64-NEXT: vor.vv v8, v16, v8 -; CHECK-RV64-NEXT: ret +; CHECK-LABEL: vror_vx_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vand.vx v16, v12, a0 +; CHECK-NEXT: vsrl.vv v16, v8, v16 +; CHECK-NEXT: vrsub.vi v12, v12, 0 +; CHECK-NEXT: vand.vx v12, v12, a0 +; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vor.vv v8, v16, v8 +; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vx_v8i64: ; CHECK-ZVBB: # %bb.0: @@ -2098,19 +1991,16 @@ define <8 x i64> @vror_vi_v8i64(<8 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_v8i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 -; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v16, 0 -; CHECK-RV32-NEXT: li a1, 1 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v16, v16, a1 -; CHECK-RV32-NEXT: vand.vx v16, v16, a0 -; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: vmv.v.i v12, 1 +; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_v8i64: @@ -2134,19 +2024,16 @@ define <8 x i64> @vror_vi_rotl_v8i64(<8 x i64> %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_v8i64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: li a0, 63 -; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.x v12, a0 -; CHECK-RV32-NEXT: vand.vi v12, v12, 1 -; CHECK-RV32-NEXT: vsll.vv v12, v8, v12 -; CHECK-RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-RV32-NEXT: vmv.v.i v16, 0 -; CHECK-RV32-NEXT: li a1, 1 ; CHECK-RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-RV32-NEXT: vsub.vx v16, v16, a1 -; CHECK-RV32-NEXT: vand.vx v16, v16, a0 -; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 -; CHECK-RV32-NEXT: vor.vv v8, v12, v8 +; CHECK-RV32-NEXT: vmv.v.i v12, 1 +; CHECK-RV32-NEXT: vrsub.vi v12, v12, 0 +; CHECK-RV32-NEXT: li a0, 63 +; CHECK-RV32-NEXT: vand.vx v12, v12, a0 +; CHECK-RV32-NEXT: vsrl.vv v12, v8, v12 +; CHECK-RV32-NEXT: vmv.v.x v16, a0 +; CHECK-RV32-NEXT: vand.vi v16, v16, 1 +; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 +; CHECK-RV32-NEXT: vor.vv v8, v8, v12 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_v8i64: