diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1370,6 +1370,23 @@ // with 1/-1. static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS, ISD::CondCode &CC, SelectionDAG &DAG) { + // If this is a single bit test that can't be handled by ANDI, shift the + // bit to be tested to the MSB and perform a signed compare with 0. + if (isIntEqualitySetCC(CC) && isNullConstant(RHS) && + LHS.getOpcode() == ISD::AND && LHS.hasOneUse() && + isa(LHS.getOperand(1))) { + uint64_t Mask = LHS.getConstantOperandVal(1); + if (isPowerOf2_64(Mask) && !isInt<12>(Mask)) { + CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; + unsigned ShAmt = LHS.getValueSizeInBits() - 1 - Log2_64(Mask); + LHS = LHS.getOperand(0); + if (ShAmt != 0) + LHS = DAG.getNode(ISD::SHL, DL, LHS.getValueType(), LHS, + DAG.getConstant(ShAmt, DL, LHS.getValueType())); + return; + } + } + // Convert X > -1 to X >= 0. if (CC == ISD::SETGT && isAllOnesConstant(RHS)) { RHS = DAG.getConstant(0, DL, RHS.getValueType()); diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -436,3 +436,1352 @@ } declare void @bar() + +define signext i32 @bit_10_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; CHECK-LABEL: bit_10_z_select_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a3, a0, 1024 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: beqz a3, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: ret + %1 = and i32 %a, 1024 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_10_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; RV32I-LABEL: bit_10_nz_select_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 21 +; RV32I-NEXT: srli a3, a0, 31 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: bnez a3, .LBB16_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: .LBB16_2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: bit_10_nz_select_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 53 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB16_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB16_2: +; RV64I-NEXT: ret +; +; ZBS-LABEL: bit_10_nz_select_i32: +; ZBS: # %bb.0: +; ZBS-NEXT: bexti a3, a0, 10 +; ZBS-NEXT: mv a0, a1 +; ZBS-NEXT: bnez a3, .LBB16_2 +; ZBS-NEXT: # %bb.1: +; ZBS-NEXT: mv a0, a2 +; ZBS-NEXT: .LBB16_2: +; ZBS-NEXT: ret + %1 = and i32 %a, 1024 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_11_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; RV32-LABEL: bit_11_z_select_i32: +; RV32: # %bb.0: +; RV32-NEXT: slli a3, a0, 20 +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: bgez a3, .LBB17_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB17_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_11_z_select_i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a3, a0, 52 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bgez a3, .LBB17_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB17_2: +; RV64-NEXT: ret + %1 = and i32 %a, 2048 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_11_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; RV32I-LABEL: bit_11_nz_select_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 20 +; RV32I-NEXT: srli a3, a0, 31 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: bnez a3, .LBB18_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: .LBB18_2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: bit_11_nz_select_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 52 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB18_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB18_2: +; RV64I-NEXT: ret +; +; ZBS-LABEL: bit_11_nz_select_i32: +; ZBS: # %bb.0: +; ZBS-NEXT: bexti a3, a0, 11 +; ZBS-NEXT: mv a0, a1 +; ZBS-NEXT: bnez a3, .LBB18_2 +; ZBS-NEXT: # %bb.1: +; ZBS-NEXT: mv a0, a2 +; ZBS-NEXT: .LBB18_2: +; ZBS-NEXT: ret + %1 = and i32 %a, 2048 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_20_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; RV32-LABEL: bit_20_z_select_i32: +; RV32: # %bb.0: +; RV32-NEXT: slli a3, a0, 11 +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: bgez a3, .LBB19_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB19_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_20_z_select_i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a3, a0, 43 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bgez a3, .LBB19_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB19_2: +; RV64-NEXT: ret + %1 = and i32 %a, 1048576 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_20_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; RV32I-LABEL: bit_20_nz_select_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 11 +; RV32I-NEXT: srli a3, a0, 31 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: bnez a3, .LBB20_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: .LBB20_2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: bit_20_nz_select_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 43 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB20_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB20_2: +; RV64I-NEXT: ret +; +; ZBS-LABEL: bit_20_nz_select_i32: +; ZBS: # %bb.0: +; ZBS-NEXT: bexti a3, a0, 20 +; ZBS-NEXT: mv a0, a1 +; ZBS-NEXT: bnez a3, .LBB20_2 +; ZBS-NEXT: # %bb.1: +; ZBS-NEXT: mv a0, a2 +; ZBS-NEXT: .LBB20_2: +; ZBS-NEXT: ret + %1 = and i32 %a, 1048576 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_31_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; RV32-LABEL: bit_31_z_select_i32: +; RV32: # %bb.0: +; RV32-NEXT: bgez a0, .LBB21_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a1, a2 +; RV32-NEXT: .LBB21_2: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_z_select_i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a3, 524288 +; RV64-NEXT: and a3, a0, a3 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: beqz a3, .LBB21_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB21_2: +; RV64-NEXT: ret + %1 = and i32 %a, 2147483648 + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define signext i32 @bit_31_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { +; RV32-LABEL: bit_31_nz_select_i32: +; RV32: # %bb.0: +; RV32-NEXT: srli a3, a0, 31 +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: bnez a3, .LBB22_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: .LBB22_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_nz_select_i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a3, 524288 +; RV64-NEXT: and a3, a0, a3 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bnez a3, .LBB22_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB22_2: +; RV64-NEXT: ret + %1 = and i32 %a, 2147483648 + %2 = icmp ne i32 %1, 0 + %3 = select i1 %2, i32 %b, i32 %c + ret i32 %3 +} + +define i64 @bit_10_z_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_10_z_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: andi a6, a0, 1024 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beqz a6, .LBB23_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB23_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_10_z_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: andi a3, a0, 1024 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: beqz a3, .LBB23_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB23_2: +; RV64-NEXT: ret + %1 = and i64 %a, 1024 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32I-LABEL: bit_10_nz_select_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 21 +; RV32I-NEXT: srli a6, a0, 31 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: bnez a6, .LBB24_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: .LBB24_2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: bit_10_nz_select_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 53 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB24_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB24_2: +; RV64I-NEXT: ret +; +; RV32ZBS-LABEL: bit_10_nz_select_i64: +; RV32ZBS: # %bb.0: +; RV32ZBS-NEXT: bexti a6, a0, 10 +; RV32ZBS-NEXT: mv a1, a3 +; RV32ZBS-NEXT: mv a0, a2 +; RV32ZBS-NEXT: bnez a6, .LBB24_2 +; RV32ZBS-NEXT: # %bb.1: +; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a1, a5 +; RV32ZBS-NEXT: .LBB24_2: +; RV32ZBS-NEXT: ret +; +; RV64ZBS-LABEL: bit_10_nz_select_i64: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: bexti a3, a0, 10 +; RV64ZBS-NEXT: mv a0, a1 +; RV64ZBS-NEXT: bnez a3, .LBB24_2 +; RV64ZBS-NEXT: # %bb.1: +; RV64ZBS-NEXT: mv a0, a2 +; RV64ZBS-NEXT: .LBB24_2: +; RV64ZBS-NEXT: ret + %1 = and i64 %a, 1024 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_11_z_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_11_z_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a6, a0, 20 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgez a6, .LBB25_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB25_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_11_z_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a3, a0, 52 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bgez a3, .LBB25_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB25_2: +; RV64-NEXT: ret + %1 = and i64 %a, 2048 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32I-LABEL: bit_11_nz_select_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 20 +; RV32I-NEXT: srli a6, a0, 31 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: bnez a6, .LBB26_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: .LBB26_2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: bit_11_nz_select_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 52 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB26_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB26_2: +; RV64I-NEXT: ret +; +; RV32ZBS-LABEL: bit_11_nz_select_i64: +; RV32ZBS: # %bb.0: +; RV32ZBS-NEXT: bexti a6, a0, 11 +; RV32ZBS-NEXT: mv a1, a3 +; RV32ZBS-NEXT: mv a0, a2 +; RV32ZBS-NEXT: bnez a6, .LBB26_2 +; RV32ZBS-NEXT: # %bb.1: +; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a1, a5 +; RV32ZBS-NEXT: .LBB26_2: +; RV32ZBS-NEXT: ret +; +; RV64ZBS-LABEL: bit_11_nz_select_i64: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: bexti a3, a0, 11 +; RV64ZBS-NEXT: mv a0, a1 +; RV64ZBS-NEXT: bnez a3, .LBB26_2 +; RV64ZBS-NEXT: # %bb.1: +; RV64ZBS-NEXT: mv a0, a2 +; RV64ZBS-NEXT: .LBB26_2: +; RV64ZBS-NEXT: ret + %1 = and i64 %a, 2048 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_20_z_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_20_z_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a6, a0, 11 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgez a6, .LBB27_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB27_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_20_z_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a3, a0, 43 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bgez a3, .LBB27_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB27_2: +; RV64-NEXT: ret + %1 = and i64 %a, 1048576 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_20_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32I-LABEL: bit_20_nz_select_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 11 +; RV32I-NEXT: srli a6, a0, 31 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: bnez a6, .LBB28_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: .LBB28_2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: bit_20_nz_select_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 43 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB28_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB28_2: +; RV64I-NEXT: ret +; +; RV32ZBS-LABEL: bit_20_nz_select_i64: +; RV32ZBS: # %bb.0: +; RV32ZBS-NEXT: bexti a6, a0, 20 +; RV32ZBS-NEXT: mv a1, a3 +; RV32ZBS-NEXT: mv a0, a2 +; RV32ZBS-NEXT: bnez a6, .LBB28_2 +; RV32ZBS-NEXT: # %bb.1: +; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a1, a5 +; RV32ZBS-NEXT: .LBB28_2: +; RV32ZBS-NEXT: ret +; +; RV64ZBS-LABEL: bit_20_nz_select_i64: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: bexti a3, a0, 20 +; RV64ZBS-NEXT: mv a0, a1 +; RV64ZBS-NEXT: bnez a3, .LBB28_2 +; RV64ZBS-NEXT: # %bb.1: +; RV64ZBS-NEXT: mv a0, a2 +; RV64ZBS-NEXT: .LBB28_2: +; RV64ZBS-NEXT: ret + %1 = and i64 %a, 1048576 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_31_z_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_31_z_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: bgez a0, .LBB29_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB29_2: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_z_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a3, a0, 32 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bgez a3, .LBB29_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB29_2: +; RV64-NEXT: ret + %1 = and i64 %a, 2147483648 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_31_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_31_nz_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: srli a6, a0, 31 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bnez a6, .LBB30_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB30_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_nz_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: srliw a3, a0, 31 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bnez a3, .LBB30_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB30_2: +; RV64-NEXT: ret + %1 = and i64 %a, 2147483648 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_32_z_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_32_z_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: andi a6, a1, 1 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: beqz a6, .LBB31_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB31_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_32_z_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a3, a0, 31 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bgez a3, .LBB31_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB31_2: +; RV64-NEXT: ret + %1 = and i64 %a, 4294967296 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_32_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_32_nz_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: andi a6, a1, 1 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bnez a6, .LBB32_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB32_2: +; RV32-NEXT: ret +; +; RV64I-LABEL: bit_32_nz_select_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 31 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB32_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB32_2: +; RV64I-NEXT: ret +; +; RV64ZBS-LABEL: bit_32_nz_select_i64: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: bexti a3, a0, 32 +; RV64ZBS-NEXT: mv a0, a1 +; RV64ZBS-NEXT: bnez a3, .LBB32_2 +; RV64ZBS-NEXT: # %bb.1: +; RV64ZBS-NEXT: mv a0, a2 +; RV64ZBS-NEXT: .LBB32_2: +; RV64ZBS-NEXT: ret + %1 = and i64 %a, 4294967296 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_55_z_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_55_z_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a6, a1, 8 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgez a6, .LBB33_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB33_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_55_z_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a3, a0, 8 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bgez a3, .LBB33_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB33_2: +; RV64-NEXT: ret + %1 = and i64 %a, 36028797018963968 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32I-LABEL: bit_55_nz_select_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a1, 8 +; RV32I-NEXT: srli a6, a0, 31 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: bnez a6, .LBB34_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: .LBB34_2: +; RV32I-NEXT: ret +; +; RV64I-LABEL: bit_55_nz_select_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: srli a3, a0, 63 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: bnez a3, .LBB34_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: .LBB34_2: +; RV64I-NEXT: ret +; +; RV32ZBS-LABEL: bit_55_nz_select_i64: +; RV32ZBS: # %bb.0: +; RV32ZBS-NEXT: bexti a6, a1, 23 +; RV32ZBS-NEXT: mv a1, a3 +; RV32ZBS-NEXT: mv a0, a2 +; RV32ZBS-NEXT: bnez a6, .LBB34_2 +; RV32ZBS-NEXT: # %bb.1: +; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a1, a5 +; RV32ZBS-NEXT: .LBB34_2: +; RV32ZBS-NEXT: ret +; +; RV64ZBS-LABEL: bit_55_nz_select_i64: +; RV64ZBS: # %bb.0: +; RV64ZBS-NEXT: bexti a3, a0, 55 +; RV64ZBS-NEXT: mv a0, a1 +; RV64ZBS-NEXT: bnez a3, .LBB34_2 +; RV64ZBS-NEXT: # %bb.1: +; RV64ZBS-NEXT: mv a0, a2 +; RV64ZBS-NEXT: .LBB34_2: +; RV64ZBS-NEXT: ret + %1 = and i64 %a, 36028797018963968 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_63_z_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_63_z_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bgez a1, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a3, a5 +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: bit_63_z_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: bgez a0, .LBB35_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a1, a2 +; RV64-NEXT: .LBB35_2: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: ret + %1 = and i64 %a, 9223372036854775808 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define i64 @bit_63_nz_select_i64(i64 %a, i64 %b, i64 %c) { +; RV32-LABEL: bit_63_nz_select_i64: +; RV32: # %bb.0: +; RV32-NEXT: srli a6, a1, 31 +; RV32-NEXT: mv a1, a3 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: bnez a6, .LBB36_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a1, a5 +; RV32-NEXT: .LBB36_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_63_nz_select_i64: +; RV64: # %bb.0: +; RV64-NEXT: srli a3, a0, 63 +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: bnez a3, .LBB36_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a2 +; RV64-NEXT: .LBB36_2: +; RV64-NEXT: ret + %1 = and i64 %a, 9223372036854775808 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, i64 %b, i64 %c + ret i64 %3 +} + +define void @bit_10_z_branch_i32(i32 signext %0) { +; CHECK-LABEL: bit_10_z_branch_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1024 +; CHECK-NEXT: bnez a0, .LBB37_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tail bar@plt +; CHECK-NEXT: .LBB37_2: +; CHECK-NEXT: ret + %2 = and i32 %0, 1024 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_nz_branch_i32(i32 signext %0) { +; CHECK-LABEL: bit_10_nz_branch_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1024 +; CHECK-NEXT: beqz a0, .LBB38_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tail bar@plt +; CHECK-NEXT: .LBB38_2: +; CHECK-NEXT: ret + %2 = and i32 %0, 1024 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_z_branch_i32(i32 signext %0) { +; RV32-LABEL: bit_11_z_branch_i32: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 20 +; RV32-NEXT: bltz a0, .LBB39_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB39_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_11_z_branch_i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bltz a0, .LBB39_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB39_2: +; RV64-NEXT: ret + %2 = and i32 %0, 2048 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_nz_branch_i32(i32 signext %0) { +; RV32-LABEL: bit_11_nz_branch_i32: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 20 +; RV32-NEXT: bgez a0, .LBB40_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB40_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_11_nz_branch_i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bgez a0, .LBB40_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB40_2: +; RV64-NEXT: ret + %2 = and i32 %0, 2048 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_z_branch_i32(i32 signext %0) { +; RV32-LABEL: bit_24_z_branch_i32: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: bltz a0, .LBB41_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB41_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_24_z_branch_i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: bltz a0, .LBB41_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB41_2: +; RV64-NEXT: ret + %2 = and i32 %0, 16777216 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_nz_branch_i32(i32 signext %0) { +; RV32-LABEL: bit_24_nz_branch_i32: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: bgez a0, .LBB42_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB42_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_24_nz_branch_i32: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: bgez a0, .LBB42_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB42_2: +; RV64-NEXT: ret + %2 = and i32 %0, 16777216 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_z_branch_i32(i32 signext %0) { +; RV32-LABEL: bit_31_z_branch_i32: +; RV32: # %bb.0: +; RV32-NEXT: bltz a0, .LBB43_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB43_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_z_branch_i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 524288 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: bnez a0, .LBB43_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB43_2: +; RV64-NEXT: ret + %2 = and i32 %0, 2147483648 + %3 = icmp eq i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_nz_branch_i32(i32 signext %0) { +; RV32-LABEL: bit_31_nz_branch_i32: +; RV32: # %bb.0: +; RV32-NEXT: bgez a0, .LBB44_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB44_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_nz_branch_i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 524288 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: beqz a0, .LBB44_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB44_2: +; RV64-NEXT: ret + %2 = and i32 %0, 2147483648 + %3 = icmp ne i32 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_z_branch_i64(i64 %0) { +; CHECK-LABEL: bit_10_z_branch_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1024 +; CHECK-NEXT: bnez a0, .LBB45_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tail bar@plt +; CHECK-NEXT: .LBB45_2: +; CHECK-NEXT: ret + %2 = and i64 %0, 1024 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_10_nz_branch_i64(i64 %0) { +; CHECK-LABEL: bit_10_nz_branch_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a0, a0, 1024 +; CHECK-NEXT: beqz a0, .LBB46_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: tail bar@plt +; CHECK-NEXT: .LBB46_2: +; CHECK-NEXT: ret + %2 = and i64 %0, 1024 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_z_branch_i64(i64 %0) { +; RV32-LABEL: bit_11_z_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 20 +; RV32-NEXT: bltz a0, .LBB47_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB47_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_11_z_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bltz a0, .LBB47_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB47_2: +; RV64-NEXT: ret + %2 = and i64 %0, 2048 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_11_nz_branch_i64(i64 %0) { +; RV32-LABEL: bit_11_nz_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 20 +; RV32-NEXT: bgez a0, .LBB48_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB48_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_11_nz_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bgez a0, .LBB48_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB48_2: +; RV64-NEXT: ret + %2 = and i64 %0, 2048 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_z_branch_i64(i64 %0) { +; RV32-LABEL: bit_24_z_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: bltz a0, .LBB49_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB49_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_24_z_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: bltz a0, .LBB49_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB49_2: +; RV64-NEXT: ret + %2 = and i64 %0, 16777216 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_24_nz_branch_i64(i64 %0) { +; RV32-LABEL: bit_24_nz_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a0, 7 +; RV32-NEXT: bgez a0, .LBB50_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB50_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_24_nz_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 39 +; RV64-NEXT: bgez a0, .LBB50_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB50_2: +; RV64-NEXT: ret + %2 = and i64 %0, 16777216 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_z_branch_i64(i64 %0) { +; RV32-LABEL: bit_31_z_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: bltz a0, .LBB51_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB51_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_z_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: bltz a0, .LBB51_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB51_2: +; RV64-NEXT: ret + %2 = and i64 %0, 2147483648 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_31_nz_branch_i64(i64 %0) { +; RV32-LABEL: bit_31_nz_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: bgez a0, .LBB52_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB52_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_31_nz_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: bgez a0, .LBB52_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB52_2: +; RV64-NEXT: ret + %2 = and i64 %0, 2147483648 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_z_branch_i64(i64 %0) { +; RV32-LABEL: bit_32_z_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: andi a0, a1, 1 +; RV32-NEXT: bnez a0, .LBB53_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB53_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_32_z_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: bltz a0, .LBB53_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB53_2: +; RV64-NEXT: ret + %2 = and i64 %0, 4294967296 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_32_nz_branch_i64(i64 %0) { +; RV32-LABEL: bit_32_nz_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: andi a0, a1, 1 +; RV32-NEXT: beqz a0, .LBB54_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB54_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_32_nz_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: bgez a0, .LBB54_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB54_2: +; RV64-NEXT: ret + %2 = and i64 %0, 4294967296 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_62_z_branch_i64(i64 %0) { +; RV32-LABEL: bit_62_z_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a1, 1 +; RV32-NEXT: bltz a0, .LBB55_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB55_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_62_z_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: bltz a0, .LBB55_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB55_2: +; RV64-NEXT: ret + %2 = and i64 %0, 4611686018427387904 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_62_nz_branch_i64(i64 %0) { +; RV32-LABEL: bit_62_nz_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: slli a0, a1, 1 +; RV32-NEXT: bgez a0, .LBB56_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB56_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_62_nz_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: bgez a0, .LBB56_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB56_2: +; RV64-NEXT: ret + %2 = and i64 %0, 4611686018427387904 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_63_z_branch_i64(i64 %0) { +; RV32-LABEL: bit_63_z_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: bltz a1, .LBB57_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB57_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_63_z_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: bltz a0, .LBB57_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB57_2: +; RV64-NEXT: ret + %2 = and i64 %0, 9223372036854775808 + %3 = icmp eq i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} + +define void @bit_63_nz_branch_i64(i64 %0) { +; RV32-LABEL: bit_63_nz_branch_i64: +; RV32: # %bb.0: +; RV32-NEXT: bgez a1, .LBB58_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: tail bar@plt +; RV32-NEXT: .LBB58_2: +; RV32-NEXT: ret +; +; RV64-LABEL: bit_63_nz_branch_i64: +; RV64: # %bb.0: +; RV64-NEXT: bgez a0, .LBB58_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: tail bar@plt +; RV64-NEXT: .LBB58_2: +; RV64-NEXT: ret + %2 = and i64 %0, 9223372036854775808 + %3 = icmp ne i64 %2, 0 + br i1 %3, label %4, label %5 + +4: + tail call void @bar() + br label %5 + +5: + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12458,10 +12458,10 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_28 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_26 ; RV64ZVE32F-NEXT: # %bb.7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_29 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_27 ; RV64ZVE32F-NEXT: .LBB97_8: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_10 @@ -12480,13 +12480,13 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_30 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_28 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_31 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_29 ; RV64ZVE32F-NEXT: .LBB97_12: # %else20 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB97_32 +; RV64ZVE32F-NEXT: bnez a2, .LBB97_30 ; RV64ZVE32F-NEXT: .LBB97_13: # %else23 ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_15 @@ -12513,37 +12513,16 @@ ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10 ; RV64ZVE32F-NEXT: .LBB97_17: # %else29 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 1 -; RV64ZVE32F-NEXT: addiw a3, a2, -2048 -; RV64ZVE32F-NEXT: and a3, a1, a3 +; RV64ZVE32F-NEXT: slli a2, a1, 52 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB97_19 -; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lb a3, 0(a3) -; RV64ZVE32F-NEXT: vmv.s.x v10, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11 -; RV64ZVE32F-NEXT: .LBB97_19: # %else32 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_21 -; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12 -; RV64ZVE32F-NEXT: .LBB97_21: # %else35 -; RV64ZVE32F-NEXT: lui a2, 2 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_23 -; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37 +; RV64ZVE32F-NEXT: bltz a2, .LBB97_31 +; RV64ZVE32F-NEXT: # %bb.18: # %else32 +; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: bltz a2, .LBB97_32 +; RV64ZVE32F-NEXT: .LBB97_19: # %else35 +; RV64ZVE32F-NEXT: slli a2, a1, 50 +; RV64ZVE32F-NEXT: bgez a2, .LBB97_21 +; RV64ZVE32F-NEXT: .LBB97_20: # %cond.load37 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -12552,24 +12531,23 @@ ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 13 -; RV64ZVE32F-NEXT: .LBB97_23: # %else38 +; RV64ZVE32F-NEXT: .LBB97_21: # %else38 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 4 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB97_25 -; RV64ZVE32F-NEXT: # %bb.24: # %cond.load40 +; RV64ZVE32F-NEXT: bgez a2, .LBB97_23 +; RV64ZVE32F-NEXT: # %bb.22: # %cond.load40 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14 -; RV64ZVE32F-NEXT: .LBB97_25: # %else41 +; RV64ZVE32F-NEXT: .LBB97_23: # %else41 ; RV64ZVE32F-NEXT: lui a2, 1048568 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB97_27 -; RV64ZVE32F-NEXT: # %bb.26: # %cond.load43 +; RV64ZVE32F-NEXT: beqz a1, .LBB97_25 +; RV64ZVE32F-NEXT: # %bb.24: # %cond.load43 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -12578,10 +12556,10 @@ ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15 -; RV64ZVE32F-NEXT: .LBB97_27: # %else44 +; RV64ZVE32F-NEXT: .LBB97_25: # %else44 ; RV64ZVE32F-NEXT: vmv1r.v v8, v9 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -12592,7 +12570,7 @@ ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_8 -; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12604,7 +12582,7 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_9 ; RV64ZVE32F-NEXT: j .LBB97_10 -; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load16 +; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) @@ -12613,7 +12591,7 @@ ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6 ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_12 -; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load19 +; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -12624,7 +12602,7 @@ ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7 ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_13 -; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load22 +; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load22 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12636,6 +12614,29 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_14 ; RV64ZVE32F-NEXT: j .LBB97_15 +; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11 +; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: bgez a2, .LBB97_19 +; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 12 +; RV64ZVE32F-NEXT: slli a2, a1, 50 +; RV64ZVE32F-NEXT: bltz a2, .LBB97_20 +; RV64ZVE32F-NEXT: j .LBB97_21 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs %v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru) ret <16 x i8> %v @@ -12722,10 +12723,10 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_60 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_50 ; RV64ZVE32F-NEXT: # %bb.7: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_61 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_51 ; RV64ZVE32F-NEXT: .LBB98_8: # %else11 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_10 @@ -12746,13 +12747,13 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_62 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_52 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_63 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_53 ; RV64ZVE32F-NEXT: .LBB98_12: # %else20 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB98_64 +; RV64ZVE32F-NEXT: bnez a2, .LBB98_54 ; RV64ZVE32F-NEXT: .LBB98_13: # %else23 ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_15 @@ -12783,27 +12784,25 @@ ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10 ; RV64ZVE32F-NEXT: .LBB98_17: # %else29 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 1 -; RV64ZVE32F-NEXT: addiw a3, a2, -2048 -; RV64ZVE32F-NEXT: and a3, a1, a3 +; RV64ZVE32F-NEXT: slli a2, a1, 52 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB98_19 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_19 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v13 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lb a3, 0(a3) -; RV64ZVE32F-NEXT: li a4, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a4, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v14, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v13 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11 ; RV64ZVE32F-NEXT: .LBB98_19: # %else32 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, mu -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_21 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -12814,9 +12813,8 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12 ; RV64ZVE32F-NEXT: .LBB98_21: # %else35 -; RV64ZVE32F-NEXT: lui a2, 2 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_23 +; RV64ZVE32F-NEXT: slli a2, a1, 50 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_23 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 @@ -12830,53 +12828,19 @@ ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13 ; RV64ZVE32F-NEXT: .LBB98_23: # %else38 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 4 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_25 -; RV64ZVE32F-NEXT: # %bb.24: # %cond.load40 -; RV64ZVE32F-NEXT: vmv.x.s a2, v12 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14 -; RV64ZVE32F-NEXT: .LBB98_25: # %else41 -; RV64ZVE32F-NEXT: lui a2, 8 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_27 -; RV64ZVE32F-NEXT: # %bb.26: # %cond.load43 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v12 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15 -; RV64ZVE32F-NEXT: .LBB98_27: # %else44 -; RV64ZVE32F-NEXT: lui a2, 16 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_29 -; RV64ZVE32F-NEXT: # %bb.28: # %cond.load46 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16 -; RV64ZVE32F-NEXT: .LBB98_29: # %else47 -; RV64ZVE32F-NEXT: lui a2, 32 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_31 -; RV64ZVE32F-NEXT: # %bb.30: # %cond.load49 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_55 +; RV64ZVE32F-NEXT: # %bb.24: # %else41 +; RV64ZVE32F-NEXT: slli a2, a1, 48 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_56 +; RV64ZVE32F-NEXT: .LBB98_25: # %else44 +; RV64ZVE32F-NEXT: slli a2, a1, 47 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_57 +; RV64ZVE32F-NEXT: .LBB98_26: # %else47 +; RV64ZVE32F-NEXT: slli a2, a1, 46 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_28 +; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -12887,13 +12851,12 @@ ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17 -; RV64ZVE32F-NEXT: .LBB98_31: # %else50 +; RV64ZVE32F-NEXT: .LBB98_28: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 64 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_33 -; RV64ZVE32F-NEXT: # %bb.32: # %cond.load52 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_30 +; RV64ZVE32F-NEXT: # %bb.29: # %cond.load52 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) @@ -12902,42 +12865,18 @@ ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18 -; RV64ZVE32F-NEXT: .LBB98_33: # %else53 +; RV64ZVE32F-NEXT: .LBB98_30: # %else53 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 128 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 44 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_35 -; RV64ZVE32F-NEXT: # %bb.34: # %cond.load55 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v13 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19 -; RV64ZVE32F-NEXT: .LBB98_35: # %else56 -; RV64ZVE32F-NEXT: lui a2, 256 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_37 -; RV64ZVE32F-NEXT: # %bb.36: # %cond.load58 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v12 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20 -; RV64ZVE32F-NEXT: .LBB98_37: # %else59 -; RV64ZVE32F-NEXT: lui a2, 512 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_39 -; RV64ZVE32F-NEXT: # %bb.38: # %cond.load61 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_58 +; RV64ZVE32F-NEXT: # %bb.31: # %else56 +; RV64ZVE32F-NEXT: slli a2, a1, 43 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_59 +; RV64ZVE32F-NEXT: .LBB98_32: # %else59 +; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_34 +; RV64ZVE32F-NEXT: .LBB98_33: # %cond.load61 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -12948,57 +12887,23 @@ ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 21 -; RV64ZVE32F-NEXT: .LBB98_39: # %else62 +; RV64ZVE32F-NEXT: .LBB98_34: # %else62 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 1024 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_41 -; RV64ZVE32F-NEXT: # %bb.40: # %cond.load64 -; RV64ZVE32F-NEXT: vmv.x.s a2, v9 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22 -; RV64ZVE32F-NEXT: .LBB98_41: # %else65 -; RV64ZVE32F-NEXT: lui a2, 2048 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_43 -; RV64ZVE32F-NEXT: # %bb.42: # %cond.load67 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v9 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23 -; RV64ZVE32F-NEXT: .LBB98_43: # %else68 -; RV64ZVE32F-NEXT: lui a2, 4096 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_45 -; RV64ZVE32F-NEXT: # %bb.44: # %cond.load70 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24 -; RV64ZVE32F-NEXT: .LBB98_45: # %else71 -; RV64ZVE32F-NEXT: lui a2, 8192 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_47 -; RV64ZVE32F-NEXT: # %bb.46: # %cond.load73 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_60 +; RV64ZVE32F-NEXT: # %bb.35: # %else65 +; RV64ZVE32F-NEXT: slli a2, a1, 40 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_61 +; RV64ZVE32F-NEXT: .LBB98_36: # %else68 +; RV64ZVE32F-NEXT: slli a2, a1, 39 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_62 +; RV64ZVE32F-NEXT: .LBB98_37: # %else71 +; RV64ZVE32F-NEXT: slli a2, a1, 38 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_39 +; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -13009,13 +12914,12 @@ ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25 -; RV64ZVE32F-NEXT: .LBB98_47: # %else74 +; RV64ZVE32F-NEXT: .LBB98_39: # %else74 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 16384 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_49 -; RV64ZVE32F-NEXT: # %bb.48: # %cond.load76 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_41 +; RV64ZVE32F-NEXT: # %bb.40: # %cond.load76 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) @@ -13024,42 +12928,18 @@ ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26 -; RV64ZVE32F-NEXT: .LBB98_49: # %else77 +; RV64ZVE32F-NEXT: .LBB98_41: # %else77 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 32768 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 36 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_51 -; RV64ZVE32F-NEXT: # %bb.50: # %cond.load79 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v9 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27 -; RV64ZVE32F-NEXT: .LBB98_51: # %else80 -; RV64ZVE32F-NEXT: lui a2, 65536 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_53 -; RV64ZVE32F-NEXT: # %bb.52: # %cond.load82 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, mu -; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28 -; RV64ZVE32F-NEXT: .LBB98_53: # %else83 -; RV64ZVE32F-NEXT: lui a2, 131072 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_55 -; RV64ZVE32F-NEXT: # %bb.54: # %cond.load85 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_63 +; RV64ZVE32F-NEXT: # %bb.42: # %else80 +; RV64ZVE32F-NEXT: slli a2, a1, 35 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_64 +; RV64ZVE32F-NEXT: .LBB98_43: # %else83 +; RV64ZVE32F-NEXT: slli a2, a1, 34 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_45 +; RV64ZVE32F-NEXT: .LBB98_44: # %cond.load85 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -13070,13 +12950,12 @@ ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 -; RV64ZVE32F-NEXT: .LBB98_55: # %else86 +; RV64ZVE32F-NEXT: .LBB98_45: # %else86 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 262144 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB98_57 -; RV64ZVE32F-NEXT: # %bb.56: # %cond.load88 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_47 +; RV64ZVE32F-NEXT: # %bb.46: # %cond.load88 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) @@ -13085,11 +12964,11 @@ ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30 -; RV64ZVE32F-NEXT: .LBB98_57: # %else89 +; RV64ZVE32F-NEXT: .LBB98_47: # %else89 ; RV64ZVE32F-NEXT: lui a2, 524288 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB98_59 -; RV64ZVE32F-NEXT: # %bb.58: # %cond.load91 +; RV64ZVE32F-NEXT: beqz a1, .LBB98_49 +; RV64ZVE32F-NEXT: # %bb.48: # %cond.load91 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 @@ -13100,10 +12979,10 @@ ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m2, tu, mu ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31 -; RV64ZVE32F-NEXT: .LBB98_59: # %else92 +; RV64ZVE32F-NEXT: .LBB98_49: # %else92 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load7 +; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -13116,7 +12995,7 @@ ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_8 -; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load10 +; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -13129,7 +13008,7 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_9 ; RV64ZVE32F-NEXT: j .LBB98_10 -; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load16 +; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) @@ -13140,7 +13019,7 @@ ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6 ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_12 -; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load19 +; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -13153,7 +13032,7 @@ ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7 ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_13 -; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load22 +; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load22 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -13166,6 +13045,132 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_14 ; RV64ZVE32F-NEXT: j .LBB98_15 +; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load40 +; RV64ZVE32F-NEXT: vmv.x.s a2, v12 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14 +; RV64ZVE32F-NEXT: slli a2, a1, 48 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_25 +; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load43 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v12 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15 +; RV64ZVE32F-NEXT: slli a2, a1, 47 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_26 +; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load46 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16 +; RV64ZVE32F-NEXT: slli a2, a1, 46 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_27 +; RV64ZVE32F-NEXT: j .LBB98_28 +; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v13 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19 +; RV64ZVE32F-NEXT: slli a2, a1, 43 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_32 +; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load58 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v12 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20 +; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_33 +; RV64ZVE32F-NEXT: j .LBB98_34 +; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load64 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22 +; RV64ZVE32F-NEXT: slli a2, a1, 40 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_36 +; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load67 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23 +; RV64ZVE32F-NEXT: slli a2, a1, 39 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_37 +; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load70 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24 +; RV64ZVE32F-NEXT: slli a2, a1, 38 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_38 +; RV64ZVE32F-NEXT: j .LBB98_39 +; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27 +; RV64ZVE32F-NEXT: slli a2, a1, 35 +; RV64ZVE32F-NEXT: bgez a2, .LBB98_43 +; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: li a3, 32 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, mu +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28 +; RV64ZVE32F-NEXT: slli a2, a1, 34 +; RV64ZVE32F-NEXT: bltz a2, .LBB98_44 +; RV64ZVE32F-NEXT: j .LBB98_45 %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs %v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru) ret <32 x i8> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -10842,10 +10842,10 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_28 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_26 ; RV64ZVE32F-NEXT: # %bb.7: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_29 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_27 ; RV64ZVE32F-NEXT: .LBB91_8: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_10 @@ -10863,13 +10863,13 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_30 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_28 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_31 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_29 ; RV64ZVE32F-NEXT: .LBB91_12: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB91_32 +; RV64ZVE32F-NEXT: bnez a2, .LBB91_30 ; RV64ZVE32F-NEXT: .LBB91_13: # %else16 ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_15 @@ -10894,34 +10894,16 @@ ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB91_17: # %else20 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 1 -; RV64ZVE32F-NEXT: addiw a3, a2, -2048 -; RV64ZVE32F-NEXT: and a3, a1, a3 +; RV64ZVE32F-NEXT: slli a2, a1, 52 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB91_19 -; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11 -; RV64ZVE32F-NEXT: vse8.v v10, (a3) -; RV64ZVE32F-NEXT: .LBB91_19: # %else22 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_21 -; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v9 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12 -; RV64ZVE32F-NEXT: vse8.v v10, (a2) -; RV64ZVE32F-NEXT: .LBB91_21: # %else24 -; RV64ZVE32F-NEXT: lui a2, 2 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_23 -; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25 +; RV64ZVE32F-NEXT: bltz a2, .LBB91_31 +; RV64ZVE32F-NEXT: # %bb.18: # %else22 +; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: bltz a2, .LBB91_32 +; RV64ZVE32F-NEXT: .LBB91_19: # %else24 +; RV64ZVE32F-NEXT: slli a2, a1, 50 +; RV64ZVE32F-NEXT: bgez a2, .LBB91_21 +; RV64ZVE32F-NEXT: .LBB91_20: # %cond.store25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -10929,23 +10911,22 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) -; RV64ZVE32F-NEXT: .LBB91_23: # %else26 +; RV64ZVE32F-NEXT: .LBB91_21: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 4 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB91_25 -; RV64ZVE32F-NEXT: # %bb.24: # %cond.store27 +; RV64ZVE32F-NEXT: bgez a2, .LBB91_23 +; RV64ZVE32F-NEXT: # %bb.22: # %cond.store27 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 14 ; RV64ZVE32F-NEXT: vse8.v v10, (a2) -; RV64ZVE32F-NEXT: .LBB91_25: # %else28 +; RV64ZVE32F-NEXT: .LBB91_23: # %else28 ; RV64ZVE32F-NEXT: lui a2, 1048568 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB91_27 -; RV64ZVE32F-NEXT: # %bb.26: # %cond.store29 +; RV64ZVE32F-NEXT: beqz a1, .LBB91_25 +; RV64ZVE32F-NEXT: # %bb.24: # %cond.store29 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v9 @@ -10953,9 +10934,9 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 15 ; RV64ZVE32F-NEXT: vse8.v v8, (a0) -; RV64ZVE32F-NEXT: .LBB91_27: # %else30 +; RV64ZVE32F-NEXT: .LBB91_25: # %else30 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store5 +; RV64ZVE32F-NEXT: .LBB91_26: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -10965,7 +10946,7 @@ ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_8 -; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store7 +; RV64ZVE32F-NEXT: .LBB91_27: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -10975,7 +10956,7 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_9 ; RV64ZVE32F-NEXT: j .LBB91_10 -; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store11 +; RV64ZVE32F-NEXT: .LBB91_28: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu @@ -10983,7 +10964,7 @@ ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_12 -; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store13 +; RV64ZVE32F-NEXT: .LBB91_29: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 @@ -10993,7 +10974,7 @@ ; RV64ZVE32F-NEXT: vse8.v v10, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB91_13 -; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store15 +; RV64ZVE32F-NEXT: .LBB91_30: # %cond.store15 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11003,6 +10984,26 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_14 ; RV64ZVE32F-NEXT: j .LBB91_15 +; RV64ZVE32F-NEXT: .LBB91_31: # %cond.store21 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 11 +; RV64ZVE32F-NEXT: vse8.v v10, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 51 +; RV64ZVE32F-NEXT: bgez a2, .LBB91_19 +; RV64ZVE32F-NEXT: .LBB91_32: # %cond.store23 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 12 +; RV64ZVE32F-NEXT: vse8.v v10, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 50 +; RV64ZVE32F-NEXT: bltz a2, .LBB91_20 +; RV64ZVE32F-NEXT: j .LBB91_21 %ptrs = getelementptr inbounds i8, i8* %base, <16 x i8> %idxs call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> %val, <16 x i8*> %ptrs, i32 1, <16 x i1> %m) ret void @@ -11075,10 +11076,10 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 8 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_60 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_50 ; RV64ZVE32F-NEXT: # %bb.7: # %else6 ; RV64ZVE32F-NEXT: andi a2, a1, 16 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_61 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_51 ; RV64ZVE32F-NEXT: .LBB92_8: # %else8 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_10 @@ -11096,13 +11097,13 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_62 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_52 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 ; RV64ZVE32F-NEXT: andi a2, a1, 128 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_63 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_53 ; RV64ZVE32F-NEXT: .LBB92_12: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 256 -; RV64ZVE32F-NEXT: bnez a2, .LBB92_64 +; RV64ZVE32F-NEXT: bnez a2, .LBB92_54 ; RV64ZVE32F-NEXT: .LBB92_13: # %else16 ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_15 @@ -11127,24 +11128,22 @@ ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_17: # %else20 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 1 -; RV64ZVE32F-NEXT: addiw a3, a2, -2048 -; RV64ZVE32F-NEXT: and a3, a1, a3 +; RV64ZVE32F-NEXT: slli a2, a1, 52 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 4 -; RV64ZVE32F-NEXT: beqz a3, .LBB92_19 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_19 ; RV64ZVE32F-NEXT: # %bb.18: # %cond.store21 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v13 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v13 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 11 -; RV64ZVE32F-NEXT: vse8.v v14, (a3) +; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_19: # %else22 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, mu -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_21 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11152,9 +11151,8 @@ ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 12 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_21: # %else24 -; RV64ZVE32F-NEXT: lui a2, 2 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_23 +; RV64ZVE32F-NEXT: slli a2, a1, 50 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_23 ; RV64ZVE32F-NEXT: # %bb.22: # %cond.store25 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 @@ -11165,44 +11163,19 @@ ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB92_23: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 4 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_25 -; RV64ZVE32F-NEXT: # %bb.24: # %cond.store27 -; RV64ZVE32F-NEXT: vmv.x.s a2, v12 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14 -; RV64ZVE32F-NEXT: vse8.v v14, (a2) -; RV64ZVE32F-NEXT: .LBB92_25: # %else28 -; RV64ZVE32F-NEXT: lui a2, 8 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_27 -; RV64ZVE32F-NEXT: # %bb.26: # %cond.store29 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v12 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15 -; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_27: # %else30 -; RV64ZVE32F-NEXT: lui a2, 16 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_29 -; RV64ZVE32F-NEXT: # %bb.28: # %cond.store31 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v10 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16 -; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_29: # %else32 -; RV64ZVE32F-NEXT: lui a2, 32 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_31 -; RV64ZVE32F-NEXT: # %bb.30: # %cond.store33 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_55 +; RV64ZVE32F-NEXT: # %bb.24: # %else28 +; RV64ZVE32F-NEXT: slli a2, a1, 48 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_56 +; RV64ZVE32F-NEXT: .LBB92_25: # %else30 +; RV64ZVE32F-NEXT: slli a2, a1, 47 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_57 +; RV64ZVE32F-NEXT: .LBB92_26: # %else32 +; RV64ZVE32F-NEXT: slli a2, a1, 46 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_28 +; RV64ZVE32F-NEXT: .LBB92_27: # %cond.store33 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -11210,48 +11183,29 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 17 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_31: # %else34 +; RV64ZVE32F-NEXT: .LBB92_28: # %else34 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 64 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_33 -; RV64ZVE32F-NEXT: # %bb.32: # %cond.store35 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_30 +; RV64ZVE32F-NEXT: # %bb.29: # %cond.store35 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 18 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) -; RV64ZVE32F-NEXT: .LBB92_33: # %else36 +; RV64ZVE32F-NEXT: .LBB92_30: # %else36 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 128 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 44 ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_35 -; RV64ZVE32F-NEXT: # %bb.34: # %cond.store37 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v13 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19 -; RV64ZVE32F-NEXT: vse8.v v14, (a2) -; RV64ZVE32F-NEXT: .LBB92_35: # %else38 -; RV64ZVE32F-NEXT: lui a2, 256 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_37 -; RV64ZVE32F-NEXT: # %bb.36: # %cond.store39 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v12 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20 -; RV64ZVE32F-NEXT: vse8.v v14, (a2) -; RV64ZVE32F-NEXT: .LBB92_37: # %else40 -; RV64ZVE32F-NEXT: lui a2, 512 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_39 -; RV64ZVE32F-NEXT: # %bb.38: # %cond.store41 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_58 +; RV64ZVE32F-NEXT: # %bb.31: # %else38 +; RV64ZVE32F-NEXT: slli a2, a1, 43 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_59 +; RV64ZVE32F-NEXT: .LBB92_32: # %else40 +; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_34 +; RV64ZVE32F-NEXT: .LBB92_33: # %cond.store41 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -11259,48 +11213,23 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 21 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) -; RV64ZVE32F-NEXT: .LBB92_39: # %else42 +; RV64ZVE32F-NEXT: .LBB92_34: # %else42 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 1024 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v12, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_41 -; RV64ZVE32F-NEXT: # %bb.40: # %cond.store43 -; RV64ZVE32F-NEXT: vmv.x.s a2, v11 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22 -; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_41: # %else44 -; RV64ZVE32F-NEXT: lui a2, 2048 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_43 -; RV64ZVE32F-NEXT: # %bb.42: # %cond.store45 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v11 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23 -; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_43: # %else46 -; RV64ZVE32F-NEXT: lui a2, 4096 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_45 -; RV64ZVE32F-NEXT: # %bb.44: # %cond.store47 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v10 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24 -; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_45: # %else48 -; RV64ZVE32F-NEXT: lui a2, 8192 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_47 -; RV64ZVE32F-NEXT: # %bb.46: # %cond.store49 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_60 +; RV64ZVE32F-NEXT: # %bb.35: # %else44 +; RV64ZVE32F-NEXT: slli a2, a1, 40 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_61 +; RV64ZVE32F-NEXT: .LBB92_36: # %else46 +; RV64ZVE32F-NEXT: slli a2, a1, 39 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_62 +; RV64ZVE32F-NEXT: .LBB92_37: # %else48 +; RV64ZVE32F-NEXT: slli a2, a1, 38 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_39 +; RV64ZVE32F-NEXT: .LBB92_38: # %cond.store49 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -11308,48 +11237,29 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 25 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_47: # %else50 +; RV64ZVE32F-NEXT: .LBB92_39: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 16384 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_49 -; RV64ZVE32F-NEXT: # %bb.48: # %cond.store51 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_41 +; RV64ZVE32F-NEXT: # %bb.40: # %cond.store51 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 26 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_49: # %else52 +; RV64ZVE32F-NEXT: .LBB92_41: # %else52 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, mu -; RV64ZVE32F-NEXT: lui a2, 32768 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 36 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 4 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_51 -; RV64ZVE32F-NEXT: # %bb.50: # %cond.store53 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v11 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27 -; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_51: # %else54 -; RV64ZVE32F-NEXT: lui a2, 65536 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_53 -; RV64ZVE32F-NEXT: # %bb.52: # %cond.store55 -; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: vmv.x.s a2, v10 -; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu -; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28 -; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_53: # %else56 -; RV64ZVE32F-NEXT: lui a2, 131072 -; RV64ZVE32F-NEXT: and a2, a1, a2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_55 -; RV64ZVE32F-NEXT: # %bb.54: # %cond.store57 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_63 +; RV64ZVE32F-NEXT: # %bb.42: # %else54 +; RV64ZVE32F-NEXT: slli a2, a1, 35 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_64 +; RV64ZVE32F-NEXT: .LBB92_43: # %else56 +; RV64ZVE32F-NEXT: slli a2, a1, 34 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_45 +; RV64ZVE32F-NEXT: .LBB92_44: # %cond.store57 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 @@ -11357,23 +11267,22 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 29 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_55: # %else58 +; RV64ZVE32F-NEXT: .LBB92_45: # %else58 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu -; RV64ZVE32F-NEXT: lui a2, 262144 -; RV64ZVE32F-NEXT: and a2, a1, a2 +; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: beqz a2, .LBB92_57 -; RV64ZVE32F-NEXT: # %bb.56: # %cond.store59 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_47 +; RV64ZVE32F-NEXT: # %bb.46: # %cond.store59 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 30 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) -; RV64ZVE32F-NEXT: .LBB92_57: # %else60 +; RV64ZVE32F-NEXT: .LBB92_47: # %else60 ; RV64ZVE32F-NEXT: lui a2, 524288 ; RV64ZVE32F-NEXT: and a1, a1, a2 -; RV64ZVE32F-NEXT: beqz a1, .LBB92_59 -; RV64ZVE32F-NEXT: # %bb.58: # %cond.store61 +; RV64ZVE32F-NEXT: beqz a1, .LBB92_49 +; RV64ZVE32F-NEXT: # %bb.48: # %cond.store61 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v10 @@ -11381,9 +11290,9 @@ ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 31 ; RV64ZVE32F-NEXT: vse8.v v8, (a0) -; RV64ZVE32F-NEXT: .LBB92_59: # %else62 +; RV64ZVE32F-NEXT: .LBB92_49: # %else62 ; RV64ZVE32F-NEXT: ret -; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store5 +; RV64ZVE32F-NEXT: .LBB92_50: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -11393,7 +11302,7 @@ ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_8 -; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store7 +; RV64ZVE32F-NEXT: .LBB92_51: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11403,7 +11312,7 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_9 ; RV64ZVE32F-NEXT: j .LBB92_10 -; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store11 +; RV64ZVE32F-NEXT: .LBB92_52: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu @@ -11411,7 +11320,7 @@ ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 128 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_12 -; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store13 +; RV64ZVE32F-NEXT: .LBB92_53: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -11421,7 +11330,7 @@ ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 256 ; RV64ZVE32F-NEXT: beqz a2, .LBB92_13 -; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store15 +; RV64ZVE32F-NEXT: .LBB92_54: # %cond.store15 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11431,6 +11340,102 @@ ; RV64ZVE32F-NEXT: andi a2, a1, 512 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_14 ; RV64ZVE32F-NEXT: j .LBB92_15 +; RV64ZVE32F-NEXT: .LBB92_55: # %cond.store27 +; RV64ZVE32F-NEXT: vmv.x.s a2, v12 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 14 +; RV64ZVE32F-NEXT: vse8.v v14, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 48 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_25 +; RV64ZVE32F-NEXT: .LBB92_56: # %cond.store29 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v12 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 15 +; RV64ZVE32F-NEXT: vse8.v v12, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 47 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_26 +; RV64ZVE32F-NEXT: .LBB92_57: # %cond.store31 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 16 +; RV64ZVE32F-NEXT: vse8.v v12, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 46 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_27 +; RV64ZVE32F-NEXT: j .LBB92_28 +; RV64ZVE32F-NEXT: .LBB92_58: # %cond.store37 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v13 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 19 +; RV64ZVE32F-NEXT: vse8.v v14, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 43 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_32 +; RV64ZVE32F-NEXT: .LBB92_59: # %cond.store39 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v12 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 20 +; RV64ZVE32F-NEXT: vse8.v v14, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 42 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_33 +; RV64ZVE32F-NEXT: j .LBB92_34 +; RV64ZVE32F-NEXT: .LBB92_60: # %cond.store43 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 22 +; RV64ZVE32F-NEXT: vse8.v v12, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 40 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_36 +; RV64ZVE32F-NEXT: .LBB92_61: # %cond.store45 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 23 +; RV64ZVE32F-NEXT: vse8.v v12, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 39 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_37 +; RV64ZVE32F-NEXT: .LBB92_62: # %cond.store47 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 24 +; RV64ZVE32F-NEXT: vse8.v v12, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 38 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_38 +; RV64ZVE32F-NEXT: j .LBB92_39 +; RV64ZVE32F-NEXT: .LBB92_63: # %cond.store53 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 27 +; RV64ZVE32F-NEXT: vse8.v v12, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 35 +; RV64ZVE32F-NEXT: bgez a2, .LBB92_43 +; RV64ZVE32F-NEXT: .LBB92_64: # %cond.store55 +; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, mu +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m2, ta, mu +; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 28 +; RV64ZVE32F-NEXT: vse8.v v12, (a2) +; RV64ZVE32F-NEXT: slli a2, a1, 34 +; RV64ZVE32F-NEXT: bltz a2, .LBB92_44 +; RV64ZVE32F-NEXT: j .LBB92_45 %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> %val, <32 x i8*> %ptrs, i32 1, <32 x i1> %m) ret void