diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9130,10 +9130,23 @@ if (SDValue V = combineSubOfBoolean(N, DAG)) return V; - // fold (sub x, (select lhs, rhs, cc, 0, y)) -> - // (select lhs, rhs, cc, x, (sub x, y)) SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1) + if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && + isNullConstant(N1.getOperand(1))) { + ISD::CondCode CCVal = cast(N1.getOperand(2))->get(); + if (CCVal == ISD::SETLT) { + EVT VT = N->getValueType(0); + SDLoc DL(N); + unsigned ShAmt = N0.getValueSizeInBits() - 1; + return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), + DAG.getConstant(ShAmt, DL, VT)); + } + } + + // fold (sub x, (select lhs, rhs, cc, 0, y)) -> + // (select lhs, rhs, cc, x, (sub x, y)) return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -219,8 +219,7 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: slti a0, a4, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a4, 31 ; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: ret %1 = shl i64 %a, %b @@ -307,8 +306,7 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: slti a1, a4, 0 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: srai a1, a4, 31 ; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret %1 = lshr i64 %a, %b diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -367,7 +367,7 @@ ; RV32-NEXT: addi a1, a1, 722 ; RV32-NEXT: srl a1, a1, a0 ; RV32-NEXT: addi a0, a0, -32 -; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; @@ -407,7 +407,7 @@ ; RV32-NEXT: addi a1, a1, 722 ; RV32-NEXT: srl a1, a1, a0 ; RV32-NEXT: addi a0, a0, -32 -; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1969,8 +1969,7 @@ ; RV32IF-NEXT: .LBB27_3: # %entry ; RV32IF-NEXT: mv a0, a3 ; RV32IF-NEXT: .LBB27_4: # %entry -; RV32IF-NEXT: slti a3, a1, 0 -; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: srai a3, a1, 31 ; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: mv a3, a0 ; RV32IF-NEXT: bltz a1, .LBB27_11 @@ -2302,8 +2301,7 @@ ; RV32-NEXT: .LBB33_3: # %entry ; RV32-NEXT: mv a0, a3 ; RV32-NEXT: .LBB33_4: # %entry -; RV32-NEXT: slti a3, a1, 0 -; RV32-NEXT: neg a3, a3 +; RV32-NEXT: srai a3, a1, 31 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: mv a3, a0 ; RV32-NEXT: bltz a1, .LBB33_11 @@ -3072,8 +3070,7 @@ ; RV64IF-NEXT: .LBB45_3: # %entry ; RV64IF-NEXT: mv a0, a3 ; RV64IF-NEXT: .LBB45_4: # %entry -; RV64IF-NEXT: slti a3, a1, 0 -; RV64IF-NEXT: neg a3, a3 +; RV64IF-NEXT: srai a3, a1, 63 ; RV64IF-NEXT: and a1, a3, a1 ; RV64IF-NEXT: slli a4, a2, 63 ; RV64IF-NEXT: mv a3, a0 @@ -3845,8 +3842,7 @@ ; RV64-NEXT: .LBB51_3: # %entry ; RV64-NEXT: mv a0, a3 ; RV64-NEXT: .LBB51_4: # %entry -; RV64-NEXT: slti a3, a1, 0 -; RV64-NEXT: neg a3, a3 +; RV64-NEXT: srai a3, a1, 63 ; RV64-NEXT: and a1, a3, a1 ; RV64-NEXT: slli a4, a2, 63 ; RV64-NEXT: mv a3, a0 diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -119,11 +119,9 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64: ; RV32I: # %bb.0: +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: sll a4, a0, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a5, a3, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a3, .LBB2_2 +; RV32I-NEXT: bltz a5, .LBB2_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: j .LBB2_3 @@ -134,14 +132,15 @@ ; RV32I-NEXT: srl a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB2_3: +; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: and a4, a5, a4 ; RV32I-NEXT: neg a7, a2 ; RV32I-NEXT: li a5, 32 -; RV32I-NEXT: sub a6, a5, a2 -; RV32I-NEXT: srl a5, a1, a7 -; RV32I-NEXT: bltz a6, .LBB2_5 +; RV32I-NEXT: sub a5, a5, a2 +; RV32I-NEXT: srl a6, a1, a7 +; RV32I-NEXT: bltz a5, .LBB2_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a0, a6 ; RV32I-NEXT: j .LBB2_6 ; RV32I-NEXT: .LBB2_5: ; RV32I-NEXT: srl a0, a0, a7 @@ -152,9 +151,8 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB2_6: -; RV32I-NEXT: slti a1, a6, 0 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a1, a5, a6 ; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: ret @@ -169,11 +167,9 @@ ; ; RV32ZBB-LABEL: rotl_64: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: sll a4, a0, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a5, a3, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a3, .LBB2_2 +; RV32ZBB-NEXT: bltz a5, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 ; RV32ZBB-NEXT: j .LBB2_3 @@ -184,14 +180,15 @@ ; RV32ZBB-NEXT: srl a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB2_3: +; RV32ZBB-NEXT: srai a5, a5, 31 ; RV32ZBB-NEXT: and a4, a5, a4 ; RV32ZBB-NEXT: neg a7, a2 ; RV32ZBB-NEXT: li a5, 32 -; RV32ZBB-NEXT: sub a6, a5, a2 -; RV32ZBB-NEXT: srl a5, a1, a7 -; RV32ZBB-NEXT: bltz a6, .LBB2_5 +; RV32ZBB-NEXT: sub a5, a5, a2 +; RV32ZBB-NEXT: srl a6, a1, a7 +; RV32ZBB-NEXT: bltz a5, .LBB2_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a0, a5 +; RV32ZBB-NEXT: mv a0, a6 ; RV32ZBB-NEXT: j .LBB2_6 ; RV32ZBB-NEXT: .LBB2_5: ; RV32ZBB-NEXT: srl a0, a0, a7 @@ -202,9 +199,8 @@ ; RV32ZBB-NEXT: sll a1, a1, a2 ; RV32ZBB-NEXT: or a0, a0, a1 ; RV32ZBB-NEXT: .LBB2_6: -; RV32ZBB-NEXT: slti a1, a6, 0 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a1, a5, a6 ; RV32ZBB-NEXT: or a1, a3, a1 ; RV32ZBB-NEXT: or a0, a4, a0 ; RV32ZBB-NEXT: ret @@ -216,11 +212,9 @@ ; ; RV32XTHEADBB-LABEL: rotl_64: ; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: sll a4, a0, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a5, a3, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a3, .LBB2_2 +; RV32XTHEADBB-NEXT: bltz a5, .LBB2_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 ; RV32XTHEADBB-NEXT: j .LBB2_3 @@ -231,14 +225,15 @@ ; RV32XTHEADBB-NEXT: srl a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB2_3: +; RV32XTHEADBB-NEXT: srai a5, a5, 31 ; RV32XTHEADBB-NEXT: and a4, a5, a4 ; RV32XTHEADBB-NEXT: neg a7, a2 ; RV32XTHEADBB-NEXT: li a5, 32 -; RV32XTHEADBB-NEXT: sub a6, a5, a2 -; RV32XTHEADBB-NEXT: srl a5, a1, a7 -; RV32XTHEADBB-NEXT: bltz a6, .LBB2_5 +; RV32XTHEADBB-NEXT: sub a5, a5, a2 +; RV32XTHEADBB-NEXT: srl a6, a1, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB2_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a0, a5 +; RV32XTHEADBB-NEXT: mv a0, a6 ; RV32XTHEADBB-NEXT: j .LBB2_6 ; RV32XTHEADBB-NEXT: .LBB2_5: ; RV32XTHEADBB-NEXT: srl a0, a0, a7 @@ -249,9 +244,8 @@ ; RV32XTHEADBB-NEXT: sll a1, a1, a2 ; RV32XTHEADBB-NEXT: or a0, a0, a1 ; RV32XTHEADBB-NEXT: .LBB2_6: -; RV32XTHEADBB-NEXT: slti a1, a6, 0 -; RV32XTHEADBB-NEXT: neg a1, a1 -; RV32XTHEADBB-NEXT: and a1, a1, a5 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a1, a5, a6 ; RV32XTHEADBB-NEXT: or a1, a3, a1 ; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: ret @@ -273,11 +267,9 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64: ; RV32I: # %bb.0: +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: srl a4, a1, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a5, a3, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: bltz a5, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: j .LBB3_3 @@ -288,14 +280,15 @@ ; RV32I-NEXT: sll a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: and a4, a5, a4 ; RV32I-NEXT: neg a7, a2 ; RV32I-NEXT: li a5, 32 -; RV32I-NEXT: sub a6, a5, a2 -; RV32I-NEXT: sll a5, a0, a7 -; RV32I-NEXT: bltz a6, .LBB3_5 +; RV32I-NEXT: sub a5, a5, a2 +; RV32I-NEXT: sll a6, a0, a7 +; RV32I-NEXT: bltz a5, .LBB3_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a1, a6 ; RV32I-NEXT: j .LBB3_6 ; RV32I-NEXT: .LBB3_5: ; RV32I-NEXT: sll a1, a1, a7 @@ -306,9 +299,8 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB3_6: -; RV32I-NEXT: slti a0, a6, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a0, a5, a6 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: ret @@ -323,11 +315,9 @@ ; ; RV32ZBB-LABEL: rotr_64: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: srl a4, a1, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a5, a3, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a3, .LBB3_2 +; RV32ZBB-NEXT: bltz a5, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 ; RV32ZBB-NEXT: j .LBB3_3 @@ -338,14 +328,15 @@ ; RV32ZBB-NEXT: sll a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB3_3: +; RV32ZBB-NEXT: srai a5, a5, 31 ; RV32ZBB-NEXT: and a4, a5, a4 ; RV32ZBB-NEXT: neg a7, a2 ; RV32ZBB-NEXT: li a5, 32 -; RV32ZBB-NEXT: sub a6, a5, a2 -; RV32ZBB-NEXT: sll a5, a0, a7 -; RV32ZBB-NEXT: bltz a6, .LBB3_5 +; RV32ZBB-NEXT: sub a5, a5, a2 +; RV32ZBB-NEXT: sll a6, a0, a7 +; RV32ZBB-NEXT: bltz a5, .LBB3_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a1, a5 +; RV32ZBB-NEXT: mv a1, a6 ; RV32ZBB-NEXT: j .LBB3_6 ; RV32ZBB-NEXT: .LBB3_5: ; RV32ZBB-NEXT: sll a1, a1, a7 @@ -356,9 +347,8 @@ ; RV32ZBB-NEXT: srl a0, a0, a2 ; RV32ZBB-NEXT: or a1, a1, a0 ; RV32ZBB-NEXT: .LBB3_6: -; RV32ZBB-NEXT: slti a0, a6, 0 -; RV32ZBB-NEXT: neg a0, a0 -; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a0, a5, a6 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a4, a1 ; RV32ZBB-NEXT: ret @@ -370,11 +360,9 @@ ; ; RV32XTHEADBB-LABEL: rotr_64: ; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: srl a4, a1, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a5, a3, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a3, .LBB3_2 +; RV32XTHEADBB-NEXT: bltz a5, .LBB3_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 ; RV32XTHEADBB-NEXT: j .LBB3_3 @@ -385,14 +373,15 @@ ; RV32XTHEADBB-NEXT: sll a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: srai a5, a5, 31 ; RV32XTHEADBB-NEXT: and a4, a5, a4 ; RV32XTHEADBB-NEXT: neg a7, a2 ; RV32XTHEADBB-NEXT: li a5, 32 -; RV32XTHEADBB-NEXT: sub a6, a5, a2 -; RV32XTHEADBB-NEXT: sll a5, a0, a7 -; RV32XTHEADBB-NEXT: bltz a6, .LBB3_5 +; RV32XTHEADBB-NEXT: sub a5, a5, a2 +; RV32XTHEADBB-NEXT: sll a6, a0, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB3_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a1, a5 +; RV32XTHEADBB-NEXT: mv a1, a6 ; RV32XTHEADBB-NEXT: j .LBB3_6 ; RV32XTHEADBB-NEXT: .LBB3_5: ; RV32XTHEADBB-NEXT: sll a1, a1, a7 @@ -403,9 +392,8 @@ ; RV32XTHEADBB-NEXT: srl a0, a0, a2 ; RV32XTHEADBB-NEXT: or a1, a1, a0 ; RV32XTHEADBB-NEXT: .LBB3_6: -; RV32XTHEADBB-NEXT: slti a0, a6, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 -; RV32XTHEADBB-NEXT: and a0, a0, a5 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a0, a5, a6 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a4, a1 ; RV32XTHEADBB-NEXT: ret @@ -748,29 +736,27 @@ ; RV32I-NEXT: srl a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: slti a5, a5, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: and a4, a5, a4 -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: srl a2, a1, a6 -; RV32I-NEXT: andi a5, a6, 63 -; RV32I-NEXT: addi a7, a5, -32 -; RV32I-NEXT: slti t0, a7, 0 -; RV32I-NEXT: neg t0, t0 -; RV32I-NEXT: and a2, t0, a2 -; RV32I-NEXT: bltz a7, .LBB10_5 +; RV32I-NEXT: srai t0, a5, 31 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: andi a7, a5, 63 +; RV32I-NEXT: addi a6, a7, -32 +; RV32I-NEXT: and a2, t0, a4 +; RV32I-NEXT: bltz a6, .LBB10_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: srl a0, a1, a7 ; RV32I-NEXT: j .LBB10_6 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: not a5, a5 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a5 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srl a0, a0, a5 +; RV32I-NEXT: not a4, a7 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a4, a7, a4 +; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a3, a2 +; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: srai a4, a6, 31 +; RV32I-NEXT: and a1, a4, a1 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask: @@ -796,29 +782,27 @@ ; RV32ZBB-NEXT: srl a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB10_3: -; RV32ZBB-NEXT: slti a5, a5, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: and a4, a5, a4 -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: srl a2, a1, a6 -; RV32ZBB-NEXT: andi a5, a6, 63 -; RV32ZBB-NEXT: addi a7, a5, -32 -; RV32ZBB-NEXT: slti t0, a7, 0 -; RV32ZBB-NEXT: neg t0, t0 -; RV32ZBB-NEXT: and a2, t0, a2 -; RV32ZBB-NEXT: bltz a7, .LBB10_5 +; RV32ZBB-NEXT: srai t0, a5, 31 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: andi a7, a5, 63 +; RV32ZBB-NEXT: addi a6, a7, -32 +; RV32ZBB-NEXT: and a2, t0, a4 +; RV32ZBB-NEXT: bltz a6, .LBB10_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: srl a0, a1, a7 ; RV32ZBB-NEXT: j .LBB10_6 ; RV32ZBB-NEXT: .LBB10_5: -; RV32ZBB-NEXT: srl a0, a0, a6 -; RV32ZBB-NEXT: not a5, a5 -; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: sll a1, a1, a5 -; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srl a0, a0, a5 +; RV32ZBB-NEXT: not a4, a7 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a4, a7, a4 +; RV32ZBB-NEXT: or a0, a0, a4 ; RV32ZBB-NEXT: .LBB10_6: -; RV32ZBB-NEXT: or a0, a4, a0 -; RV32ZBB-NEXT: or a1, a3, a2 +; RV32ZBB-NEXT: srl a1, a1, a5 +; RV32ZBB-NEXT: srai a4, a6, 31 +; RV32ZBB-NEXT: and a1, a4, a1 +; RV32ZBB-NEXT: or a1, a3, a1 +; RV32ZBB-NEXT: or a0, a2, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask: @@ -841,29 +825,27 @@ ; RV32XTHEADBB-NEXT: srl a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB10_3: -; RV32XTHEADBB-NEXT: slti a5, a5, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: and a4, a5, a4 -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: srl a2, a1, a6 -; RV32XTHEADBB-NEXT: andi a5, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, a5, -32 -; RV32XTHEADBB-NEXT: slti t0, a7, 0 -; RV32XTHEADBB-NEXT: neg t0, t0 -; RV32XTHEADBB-NEXT: and a2, t0, a2 -; RV32XTHEADBB-NEXT: bltz a7, .LBB10_5 +; RV32XTHEADBB-NEXT: srai t0, a5, 31 +; RV32XTHEADBB-NEXT: neg a5, a2 +; RV32XTHEADBB-NEXT: andi a7, a5, 63 +; RV32XTHEADBB-NEXT: addi a6, a7, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB10_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: srl a0, a1, a5 +; RV32XTHEADBB-NEXT: srl a0, a1, a7 ; RV32XTHEADBB-NEXT: j .LBB10_6 ; RV32XTHEADBB-NEXT: .LBB10_5: -; RV32XTHEADBB-NEXT: srl a0, a0, a6 -; RV32XTHEADBB-NEXT: not a5, a5 -; RV32XTHEADBB-NEXT: slli a1, a1, 1 -; RV32XTHEADBB-NEXT: sll a1, a1, a5 -; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a5 +; RV32XTHEADBB-NEXT: not a4, a7 +; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: sll a4, a7, a4 +; RV32XTHEADBB-NEXT: or a0, a0, a4 ; RV32XTHEADBB-NEXT: .LBB10_6: -; RV32XTHEADBB-NEXT: or a0, a4, a0 -; RV32XTHEADBB-NEXT: or a1, a3, a2 +; RV32XTHEADBB-NEXT: srl a1, a1, a5 +; RV32XTHEADBB-NEXT: srai a4, a6, 31 +; RV32XTHEADBB-NEXT: and a1, a4, a1 +; RV32XTHEADBB-NEXT: or a1, a3, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotl_64_mask: @@ -897,30 +879,28 @@ ; RV32I-NEXT: srl a3, a6, a3 ; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: sll a5, a0, a2 -; RV32I-NEXT: slti a4, a4, 0 -; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: and a4, a4, a5 -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: srl a2, a1, a6 -; RV32I-NEXT: andi a5, a6, 63 -; RV32I-NEXT: addi a7, a5, -32 -; RV32I-NEXT: slti t0, a7, 0 -; RV32I-NEXT: neg t0, t0 -; RV32I-NEXT: and a2, t0, a2 -; RV32I-NEXT: bltz a7, .LBB11_5 +; RV32I-NEXT: sll a7, a0, a2 +; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a6, a4, 63 +; RV32I-NEXT: addi a5, a6, -32 +; RV32I-NEXT: and a2, t0, a7 +; RV32I-NEXT: bltz a5, .LBB11_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: srl a0, a1, a6 ; RV32I-NEXT: j .LBB11_6 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: not a5, a5 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a5 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: not a6, a6 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a0, a0, a6 ; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a3, a2 +; RV32I-NEXT: srl a1, a1, a4 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a1, a5, a1 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask_and_127_and_63: @@ -946,30 +926,28 @@ ; RV32ZBB-NEXT: srl a3, a6, a3 ; RV32ZBB-NEXT: or a3, a5, a3 ; RV32ZBB-NEXT: .LBB11_3: -; RV32ZBB-NEXT: sll a5, a0, a2 -; RV32ZBB-NEXT: slti a4, a4, 0 -; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: and a4, a4, a5 -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: srl a2, a1, a6 -; RV32ZBB-NEXT: andi a5, a6, 63 -; RV32ZBB-NEXT: addi a7, a5, -32 -; RV32ZBB-NEXT: slti t0, a7, 0 -; RV32ZBB-NEXT: neg t0, t0 -; RV32ZBB-NEXT: and a2, t0, a2 -; RV32ZBB-NEXT: bltz a7, .LBB11_5 +; RV32ZBB-NEXT: sll a7, a0, a2 +; RV32ZBB-NEXT: srai t0, a4, 31 +; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: andi a6, a4, 63 +; RV32ZBB-NEXT: addi a5, a6, -32 +; RV32ZBB-NEXT: and a2, t0, a7 +; RV32ZBB-NEXT: bltz a5, .LBB11_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: srl a0, a1, a6 ; RV32ZBB-NEXT: j .LBB11_6 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: srl a0, a0, a6 -; RV32ZBB-NEXT: not a5, a5 -; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: sll a1, a1, a5 -; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srl a0, a0, a4 +; RV32ZBB-NEXT: not a6, a6 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a6, a7, a6 +; RV32ZBB-NEXT: or a0, a0, a6 ; RV32ZBB-NEXT: .LBB11_6: -; RV32ZBB-NEXT: or a0, a4, a0 -; RV32ZBB-NEXT: or a1, a3, a2 +; RV32ZBB-NEXT: srl a1, a1, a4 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a1, a5, a1 +; RV32ZBB-NEXT: or a1, a3, a1 +; RV32ZBB-NEXT: or a0, a2, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask_and_127_and_63: @@ -992,30 +970,28 @@ ; RV32XTHEADBB-NEXT: srl a3, a6, a3 ; RV32XTHEADBB-NEXT: or a3, a5, a3 ; RV32XTHEADBB-NEXT: .LBB11_3: -; RV32XTHEADBB-NEXT: sll a5, a0, a2 -; RV32XTHEADBB-NEXT: slti a4, a4, 0 -; RV32XTHEADBB-NEXT: neg a4, a4 -; RV32XTHEADBB-NEXT: and a4, a4, a5 -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: srl a2, a1, a6 -; RV32XTHEADBB-NEXT: andi a5, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, a5, -32 -; RV32XTHEADBB-NEXT: slti t0, a7, 0 -; RV32XTHEADBB-NEXT: neg t0, t0 -; RV32XTHEADBB-NEXT: and a2, t0, a2 -; RV32XTHEADBB-NEXT: bltz a7, .LBB11_5 +; RV32XTHEADBB-NEXT: sll a7, a0, a2 +; RV32XTHEADBB-NEXT: srai t0, a4, 31 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: andi a6, a4, 63 +; RV32XTHEADBB-NEXT: addi a5, a6, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB11_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: srl a0, a1, a5 +; RV32XTHEADBB-NEXT: srl a0, a1, a6 ; RV32XTHEADBB-NEXT: j .LBB11_6 ; RV32XTHEADBB-NEXT: .LBB11_5: -; RV32XTHEADBB-NEXT: srl a0, a0, a6 -; RV32XTHEADBB-NEXT: not a5, a5 -; RV32XTHEADBB-NEXT: slli a1, a1, 1 -; RV32XTHEADBB-NEXT: sll a1, a1, a5 -; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a4 +; RV32XTHEADBB-NEXT: not a6, a6 +; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: sll a6, a7, a6 +; RV32XTHEADBB-NEXT: or a0, a0, a6 ; RV32XTHEADBB-NEXT: .LBB11_6: -; RV32XTHEADBB-NEXT: or a0, a4, a0 -; RV32XTHEADBB-NEXT: or a1, a3, a2 +; RV32XTHEADBB-NEXT: srl a1, a1, a4 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a1, a5, a1 +; RV32XTHEADBB-NEXT: or a1, a3, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63: @@ -1093,11 +1069,9 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64_mask: ; RV32I: # %bb.0: +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: srl a4, a1, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a5, a3, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a3, .LBB13_2 +; RV32I-NEXT: bltz a5, .LBB13_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: j .LBB13_3 @@ -1108,24 +1082,24 @@ ; RV32I-NEXT: sll a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi t0, a6, 63 -; RV32I-NEXT: addi a7, t0, -32 -; RV32I-NEXT: and a2, a5, a4 -; RV32I-NEXT: bltz a7, .LBB13_5 +; RV32I-NEXT: srai t0, a5, 31 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: andi a7, a5, 63 +; RV32I-NEXT: addi a6, a7, -32 +; RV32I-NEXT: and a2, t0, a4 +; RV32I-NEXT: bltz a6, .LBB13_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a1, a0, t0 +; RV32I-NEXT: sll a1, a0, a7 ; RV32I-NEXT: j .LBB13_6 ; RV32I-NEXT: .LBB13_5: -; RV32I-NEXT: sll a1, a1, a6 -; RV32I-NEXT: not a4, t0 -; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: sll a1, a1, a5 +; RV32I-NEXT: not a4, a7 +; RV32I-NEXT: srli a7, a0, 1 +; RV32I-NEXT: srl a4, a7, a4 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB13_6: -; RV32I-NEXT: sll a0, a0, a6 -; RV32I-NEXT: slti a4, a7, 0 -; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sll a0, a0, a5 +; RV32I-NEXT: srai a4, a6, 31 ; RV32I-NEXT: and a0, a4, a0 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a2, a1 @@ -1141,11 +1115,9 @@ ; ; RV32ZBB-LABEL: rotr_64_mask: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: srl a4, a1, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a5, a3, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a3, .LBB13_2 +; RV32ZBB-NEXT: bltz a5, .LBB13_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 ; RV32ZBB-NEXT: j .LBB13_3 @@ -1156,24 +1128,24 @@ ; RV32ZBB-NEXT: sll a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB13_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi t0, a6, 63 -; RV32ZBB-NEXT: addi a7, t0, -32 -; RV32ZBB-NEXT: and a2, a5, a4 -; RV32ZBB-NEXT: bltz a7, .LBB13_5 +; RV32ZBB-NEXT: srai t0, a5, 31 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: andi a7, a5, 63 +; RV32ZBB-NEXT: addi a6, a7, -32 +; RV32ZBB-NEXT: and a2, t0, a4 +; RV32ZBB-NEXT: bltz a6, .LBB13_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a1, a0, t0 +; RV32ZBB-NEXT: sll a1, a0, a7 ; RV32ZBB-NEXT: j .LBB13_6 ; RV32ZBB-NEXT: .LBB13_5: -; RV32ZBB-NEXT: sll a1, a1, a6 -; RV32ZBB-NEXT: not a4, t0 -; RV32ZBB-NEXT: srli a5, a0, 1 -; RV32ZBB-NEXT: srl a4, a5, a4 +; RV32ZBB-NEXT: sll a1, a1, a5 +; RV32ZBB-NEXT: not a4, a7 +; RV32ZBB-NEXT: srli a7, a0, 1 +; RV32ZBB-NEXT: srl a4, a7, a4 ; RV32ZBB-NEXT: or a1, a1, a4 ; RV32ZBB-NEXT: .LBB13_6: -; RV32ZBB-NEXT: sll a0, a0, a6 -; RV32ZBB-NEXT: slti a4, a7, 0 -; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sll a0, a0, a5 +; RV32ZBB-NEXT: srai a4, a6, 31 ; RV32ZBB-NEXT: and a0, a4, a0 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a2, a1 @@ -1186,11 +1158,9 @@ ; ; RV32XTHEADBB-LABEL: rotr_64_mask: ; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: srl a4, a1, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a5, a3, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a3, .LBB13_2 +; RV32XTHEADBB-NEXT: bltz a5, .LBB13_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 ; RV32XTHEADBB-NEXT: j .LBB13_3 @@ -1201,24 +1171,24 @@ ; RV32XTHEADBB-NEXT: sll a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB13_3: -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: andi t0, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, t0, -32 -; RV32XTHEADBB-NEXT: and a2, a5, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB13_5 +; RV32XTHEADBB-NEXT: srai t0, a5, 31 +; RV32XTHEADBB-NEXT: neg a5, a2 +; RV32XTHEADBB-NEXT: andi a7, a5, 63 +; RV32XTHEADBB-NEXT: addi a6, a7, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB13_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: sll a1, a0, t0 +; RV32XTHEADBB-NEXT: sll a1, a0, a7 ; RV32XTHEADBB-NEXT: j .LBB13_6 ; RV32XTHEADBB-NEXT: .LBB13_5: -; RV32XTHEADBB-NEXT: sll a1, a1, a6 -; RV32XTHEADBB-NEXT: not a4, t0 -; RV32XTHEADBB-NEXT: srli a5, a0, 1 -; RV32XTHEADBB-NEXT: srl a4, a5, a4 +; RV32XTHEADBB-NEXT: sll a1, a1, a5 +; RV32XTHEADBB-NEXT: not a4, a7 +; RV32XTHEADBB-NEXT: srli a7, a0, 1 +; RV32XTHEADBB-NEXT: srl a4, a7, a4 ; RV32XTHEADBB-NEXT: or a1, a1, a4 ; RV32XTHEADBB-NEXT: .LBB13_6: -; RV32XTHEADBB-NEXT: sll a0, a0, a6 -; RV32XTHEADBB-NEXT: slti a4, a7, 0 -; RV32XTHEADBB-NEXT: neg a4, a4 +; RV32XTHEADBB-NEXT: sll a0, a0, a5 +; RV32XTHEADBB-NEXT: srai a4, a6, 31 ; RV32XTHEADBB-NEXT: and a0, a4, a0 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 @@ -1242,41 +1212,39 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64_mask_and_127_and_63: ; RV32I: # %bb.0: -; RV32I-NEXT: srl a4, a1, a2 ; RV32I-NEXT: andi a3, a2, 127 -; RV32I-NEXT: addi a6, a3, -32 -; RV32I-NEXT: slti a5, a6, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a6, .LBB14_2 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: bltz a4, .LBB14_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a3, a1, a3 ; RV32I-NEXT: j .LBB14_3 ; RV32I-NEXT: .LBB14_2: -; RV32I-NEXT: srl a6, a0, a2 -; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: slli a6, a1, 1 ; RV32I-NEXT: not a3, a3 -; RV32I-NEXT: sll a3, a7, a3 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: sll a3, a6, a3 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: .LBB14_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi t0, a6, 63 -; RV32I-NEXT: addi a7, t0, -32 -; RV32I-NEXT: and a2, a5, a4 -; RV32I-NEXT: bltz a7, .LBB14_5 +; RV32I-NEXT: srl a7, a1, a2 +; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a6, a4, 63 +; RV32I-NEXT: addi a5, a6, -32 +; RV32I-NEXT: and a2, t0, a7 +; RV32I-NEXT: bltz a5, .LBB14_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a1, a0, t0 +; RV32I-NEXT: sll a1, a0, a6 ; RV32I-NEXT: j .LBB14_6 ; RV32I-NEXT: .LBB14_5: -; RV32I-NEXT: sll a1, a1, a6 -; RV32I-NEXT: not a4, t0 -; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: not a6, a6 +; RV32I-NEXT: srli a7, a0, 1 +; RV32I-NEXT: srl a6, a7, a6 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: .LBB14_6: -; RV32I-NEXT: sll a0, a0, a6 -; RV32I-NEXT: slti a4, a7, 0 -; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: sll a0, a0, a4 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a0, a5, a0 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret @@ -1291,41 +1259,39 @@ ; ; RV32ZBB-LABEL: rotr_64_mask_and_127_and_63: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: srl a4, a1, a2 ; RV32ZBB-NEXT: andi a3, a2, 127 -; RV32ZBB-NEXT: addi a6, a3, -32 -; RV32ZBB-NEXT: slti a5, a6, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a6, .LBB14_2 +; RV32ZBB-NEXT: addi a4, a3, -32 +; RV32ZBB-NEXT: bltz a4, .LBB14_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: srl a3, a1, a3 ; RV32ZBB-NEXT: j .LBB14_3 ; RV32ZBB-NEXT: .LBB14_2: -; RV32ZBB-NEXT: srl a6, a0, a2 -; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: srl a5, a0, a2 +; RV32ZBB-NEXT: slli a6, a1, 1 ; RV32ZBB-NEXT: not a3, a3 -; RV32ZBB-NEXT: sll a3, a7, a3 -; RV32ZBB-NEXT: or a3, a6, a3 +; RV32ZBB-NEXT: sll a3, a6, a3 +; RV32ZBB-NEXT: or a3, a5, a3 ; RV32ZBB-NEXT: .LBB14_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi t0, a6, 63 -; RV32ZBB-NEXT: addi a7, t0, -32 -; RV32ZBB-NEXT: and a2, a5, a4 -; RV32ZBB-NEXT: bltz a7, .LBB14_5 +; RV32ZBB-NEXT: srl a7, a1, a2 +; RV32ZBB-NEXT: srai t0, a4, 31 +; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: andi a6, a4, 63 +; RV32ZBB-NEXT: addi a5, a6, -32 +; RV32ZBB-NEXT: and a2, t0, a7 +; RV32ZBB-NEXT: bltz a5, .LBB14_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a1, a0, t0 +; RV32ZBB-NEXT: sll a1, a0, a6 ; RV32ZBB-NEXT: j .LBB14_6 ; RV32ZBB-NEXT: .LBB14_5: -; RV32ZBB-NEXT: sll a1, a1, a6 -; RV32ZBB-NEXT: not a4, t0 -; RV32ZBB-NEXT: srli a5, a0, 1 -; RV32ZBB-NEXT: srl a4, a5, a4 -; RV32ZBB-NEXT: or a1, a1, a4 +; RV32ZBB-NEXT: sll a1, a1, a4 +; RV32ZBB-NEXT: not a6, a6 +; RV32ZBB-NEXT: srli a7, a0, 1 +; RV32ZBB-NEXT: srl a6, a7, a6 +; RV32ZBB-NEXT: or a1, a1, a6 ; RV32ZBB-NEXT: .LBB14_6: -; RV32ZBB-NEXT: sll a0, a0, a6 -; RV32ZBB-NEXT: slti a4, a7, 0 -; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: and a0, a4, a0 +; RV32ZBB-NEXT: sll a0, a0, a4 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a0, a5, a0 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret @@ -1337,41 +1303,39 @@ ; ; RV32XTHEADBB-LABEL: rotr_64_mask_and_127_and_63: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: srl a4, a1, a2 ; RV32XTHEADBB-NEXT: andi a3, a2, 127 -; RV32XTHEADBB-NEXT: addi a6, a3, -32 -; RV32XTHEADBB-NEXT: slti a5, a6, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a6, .LBB14_2 +; RV32XTHEADBB-NEXT: addi a4, a3, -32 +; RV32XTHEADBB-NEXT: bltz a4, .LBB14_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: srl a3, a1, a3 ; RV32XTHEADBB-NEXT: j .LBB14_3 ; RV32XTHEADBB-NEXT: .LBB14_2: -; RV32XTHEADBB-NEXT: srl a6, a0, a2 -; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: srl a5, a0, a2 +; RV32XTHEADBB-NEXT: slli a6, a1, 1 ; RV32XTHEADBB-NEXT: not a3, a3 -; RV32XTHEADBB-NEXT: sll a3, a7, a3 -; RV32XTHEADBB-NEXT: or a3, a6, a3 +; RV32XTHEADBB-NEXT: sll a3, a6, a3 +; RV32XTHEADBB-NEXT: or a3, a5, a3 ; RV32XTHEADBB-NEXT: .LBB14_3: -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: andi t0, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, t0, -32 -; RV32XTHEADBB-NEXT: and a2, a5, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB14_5 +; RV32XTHEADBB-NEXT: srl a7, a1, a2 +; RV32XTHEADBB-NEXT: srai t0, a4, 31 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: andi a6, a4, 63 +; RV32XTHEADBB-NEXT: addi a5, a6, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB14_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: sll a1, a0, t0 +; RV32XTHEADBB-NEXT: sll a1, a0, a6 ; RV32XTHEADBB-NEXT: j .LBB14_6 ; RV32XTHEADBB-NEXT: .LBB14_5: -; RV32XTHEADBB-NEXT: sll a1, a1, a6 -; RV32XTHEADBB-NEXT: not a4, t0 -; RV32XTHEADBB-NEXT: srli a5, a0, 1 -; RV32XTHEADBB-NEXT: srl a4, a5, a4 -; RV32XTHEADBB-NEXT: or a1, a1, a4 +; RV32XTHEADBB-NEXT: sll a1, a1, a4 +; RV32XTHEADBB-NEXT: not a6, a6 +; RV32XTHEADBB-NEXT: srli a7, a0, 1 +; RV32XTHEADBB-NEXT: srl a6, a7, a6 +; RV32XTHEADBB-NEXT: or a1, a1, a6 ; RV32XTHEADBB-NEXT: .LBB14_6: -; RV32XTHEADBB-NEXT: sll a0, a0, a6 -; RV32XTHEADBB-NEXT: slti a4, a7, 0 -; RV32XTHEADBB-NEXT: neg a4, a4 -; RV32XTHEADBB-NEXT: and a0, a4, a0 +; RV32XTHEADBB-NEXT: sll a0, a0, a4 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a0, a5, a0 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 ; RV32XTHEADBB-NEXT: ret @@ -1550,8 +1514,7 @@ ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB17_7: ; RV32I-NEXT: sll a2, a2, a4 -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: sltu a1, a0, a1 @@ -1605,8 +1568,7 @@ ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB17_7: ; RV32ZBB-NEXT: sll a2, a2, a4 -; RV32ZBB-NEXT: slti a0, a0, 0 -; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: srai a0, a0, 31 ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: add a0, a1, a0 ; RV32ZBB-NEXT: sltu a1, a0, a1 @@ -1656,8 +1618,7 @@ ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB17_7: ; RV32XTHEADBB-NEXT: sll a2, a2, a4 -; RV32XTHEADBB-NEXT: slti a0, a0, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: srai a0, a0, 31 ; RV32XTHEADBB-NEXT: and a0, a0, a2 ; RV32XTHEADBB-NEXT: add a0, a1, a0 ; RV32XTHEADBB-NEXT: sltu a1, a0, a1 @@ -1782,8 +1743,7 @@ ; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: .LBB19_7: ; RV32I-NEXT: sll a2, a2, a4 -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: add a0, a6, a0 ; RV32I-NEXT: sltu a2, a0, a6 @@ -1836,8 +1796,7 @@ ; RV32ZBB-NEXT: or a3, a3, a5 ; RV32ZBB-NEXT: .LBB19_7: ; RV32ZBB-NEXT: sll a2, a2, a4 -; RV32ZBB-NEXT: slti a0, a0, 0 -; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: srai a0, a0, 31 ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: add a0, a6, a0 ; RV32ZBB-NEXT: sltu a2, a0, a6 @@ -1887,8 +1846,7 @@ ; RV32XTHEADBB-NEXT: or a3, a3, a5 ; RV32XTHEADBB-NEXT: .LBB19_7: ; RV32XTHEADBB-NEXT: sll a2, a2, a4 -; RV32XTHEADBB-NEXT: slti a0, a0, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: srai a0, a0, 31 ; RV32XTHEADBB-NEXT: and a0, a0, a2 ; RV32XTHEADBB-NEXT: add a0, a6, a0 ; RV32XTHEADBB-NEXT: sltu a2, a0, a6 @@ -2404,11 +2362,9 @@ ; RV32I-LABEL: rotl_64_zext: ; RV32I: # %bb.0: ; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: sll a5, a0, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a6, a3, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: bltz a3, .LBB24_2 +; RV32I-NEXT: bltz a6, .LBB24_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: j .LBB24_3 @@ -2419,13 +2375,14 @@ ; RV32I-NEXT: srl a7, t0, a7 ; RV32I-NEXT: or a3, a3, a7 ; RV32I-NEXT: .LBB24_3: +; RV32I-NEXT: srai a6, a6, 31 ; RV32I-NEXT: and a5, a6, a5 ; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub a7, a6, a2 -; RV32I-NEXT: srl a6, a1, a4 -; RV32I-NEXT: bltz a7, .LBB24_5 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: srl a7, a1, a4 +; RV32I-NEXT: bltz a6, .LBB24_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a0, a7 ; RV32I-NEXT: j .LBB24_6 ; RV32I-NEXT: .LBB24_5: ; RV32I-NEXT: li t0, 64 @@ -2436,9 +2393,8 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB24_6: -; RV32I-NEXT: slti a1, a7, 0 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: srai a1, a6, 31 +; RV32I-NEXT: and a1, a1, a7 ; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: ret @@ -2454,11 +2410,9 @@ ; RV32ZBB-LABEL: rotl_64_zext: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: sll a5, a0, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a6, a3, 0 -; RV32ZBB-NEXT: neg a6, a6 -; RV32ZBB-NEXT: bltz a3, .LBB24_2 +; RV32ZBB-NEXT: bltz a6, .LBB24_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a5 ; RV32ZBB-NEXT: j .LBB24_3 @@ -2469,13 +2423,14 @@ ; RV32ZBB-NEXT: srl a7, t0, a7 ; RV32ZBB-NEXT: or a3, a3, a7 ; RV32ZBB-NEXT: .LBB24_3: +; RV32ZBB-NEXT: srai a6, a6, 31 ; RV32ZBB-NEXT: and a5, a6, a5 ; RV32ZBB-NEXT: li a6, 32 -; RV32ZBB-NEXT: sub a7, a6, a2 -; RV32ZBB-NEXT: srl a6, a1, a4 -; RV32ZBB-NEXT: bltz a7, .LBB24_5 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: srl a7, a1, a4 +; RV32ZBB-NEXT: bltz a6, .LBB24_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a0, a6 +; RV32ZBB-NEXT: mv a0, a7 ; RV32ZBB-NEXT: j .LBB24_6 ; RV32ZBB-NEXT: .LBB24_5: ; RV32ZBB-NEXT: li t0, 64 @@ -2486,9 +2441,8 @@ ; RV32ZBB-NEXT: sll a1, a1, a2 ; RV32ZBB-NEXT: or a0, a0, a1 ; RV32ZBB-NEXT: .LBB24_6: -; RV32ZBB-NEXT: slti a1, a7, 0 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: and a1, a1, a6 +; RV32ZBB-NEXT: srai a1, a6, 31 +; RV32ZBB-NEXT: and a1, a1, a7 ; RV32ZBB-NEXT: or a1, a3, a1 ; RV32ZBB-NEXT: or a0, a5, a0 ; RV32ZBB-NEXT: ret @@ -2501,11 +2455,9 @@ ; RV32XTHEADBB-LABEL: rotl_64_zext: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: sll a5, a0, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a6, a3, 0 -; RV32XTHEADBB-NEXT: neg a6, a6 -; RV32XTHEADBB-NEXT: bltz a3, .LBB24_2 +; RV32XTHEADBB-NEXT: bltz a6, .LBB24_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a5 ; RV32XTHEADBB-NEXT: j .LBB24_3 @@ -2516,13 +2468,14 @@ ; RV32XTHEADBB-NEXT: srl a7, t0, a7 ; RV32XTHEADBB-NEXT: or a3, a3, a7 ; RV32XTHEADBB-NEXT: .LBB24_3: +; RV32XTHEADBB-NEXT: srai a6, a6, 31 ; RV32XTHEADBB-NEXT: and a5, a6, a5 ; RV32XTHEADBB-NEXT: li a6, 32 -; RV32XTHEADBB-NEXT: sub a7, a6, a2 -; RV32XTHEADBB-NEXT: srl a6, a1, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB24_5 +; RV32XTHEADBB-NEXT: sub a6, a6, a2 +; RV32XTHEADBB-NEXT: srl a7, a1, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB24_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a0, a6 +; RV32XTHEADBB-NEXT: mv a0, a7 ; RV32XTHEADBB-NEXT: j .LBB24_6 ; RV32XTHEADBB-NEXT: .LBB24_5: ; RV32XTHEADBB-NEXT: li t0, 64 @@ -2533,9 +2486,8 @@ ; RV32XTHEADBB-NEXT: sll a1, a1, a2 ; RV32XTHEADBB-NEXT: or a0, a0, a1 ; RV32XTHEADBB-NEXT: .LBB24_6: -; RV32XTHEADBB-NEXT: slti a1, a7, 0 -; RV32XTHEADBB-NEXT: neg a1, a1 -; RV32XTHEADBB-NEXT: and a1, a1, a6 +; RV32XTHEADBB-NEXT: srai a1, a6, 31 +; RV32XTHEADBB-NEXT: and a1, a1, a7 ; RV32XTHEADBB-NEXT: or a1, a3, a1 ; RV32XTHEADBB-NEXT: or a0, a5, a0 ; RV32XTHEADBB-NEXT: ret @@ -2560,11 +2512,9 @@ ; RV32I-LABEL: rotr_64_zext: ; RV32I: # %bb.0: ; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: srl a5, a1, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a6, a3, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: bltz a3, .LBB25_2 +; RV32I-NEXT: bltz a6, .LBB25_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: j .LBB25_3 @@ -2575,13 +2525,14 @@ ; RV32I-NEXT: sll a7, t0, a7 ; RV32I-NEXT: or a3, a3, a7 ; RV32I-NEXT: .LBB25_3: +; RV32I-NEXT: srai a6, a6, 31 ; RV32I-NEXT: and a5, a6, a5 ; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub a7, a6, a2 -; RV32I-NEXT: sll a6, a0, a4 -; RV32I-NEXT: bltz a7, .LBB25_5 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: sll a7, a0, a4 +; RV32I-NEXT: bltz a6, .LBB25_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: mv a1, a7 ; RV32I-NEXT: j .LBB25_6 ; RV32I-NEXT: .LBB25_5: ; RV32I-NEXT: li t0, 64 @@ -2592,9 +2543,8 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB25_6: -; RV32I-NEXT: slti a0, a7, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: srai a0, a6, 31 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: ret @@ -2610,11 +2560,9 @@ ; RV32ZBB-LABEL: rotr_64_zext: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: srl a5, a1, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a6, a3, 0 -; RV32ZBB-NEXT: neg a6, a6 -; RV32ZBB-NEXT: bltz a3, .LBB25_2 +; RV32ZBB-NEXT: bltz a6, .LBB25_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a5 ; RV32ZBB-NEXT: j .LBB25_3 @@ -2625,13 +2573,14 @@ ; RV32ZBB-NEXT: sll a7, t0, a7 ; RV32ZBB-NEXT: or a3, a3, a7 ; RV32ZBB-NEXT: .LBB25_3: +; RV32ZBB-NEXT: srai a6, a6, 31 ; RV32ZBB-NEXT: and a5, a6, a5 ; RV32ZBB-NEXT: li a6, 32 -; RV32ZBB-NEXT: sub a7, a6, a2 -; RV32ZBB-NEXT: sll a6, a0, a4 -; RV32ZBB-NEXT: bltz a7, .LBB25_5 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: sll a7, a0, a4 +; RV32ZBB-NEXT: bltz a6, .LBB25_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a1, a6 +; RV32ZBB-NEXT: mv a1, a7 ; RV32ZBB-NEXT: j .LBB25_6 ; RV32ZBB-NEXT: .LBB25_5: ; RV32ZBB-NEXT: li t0, 64 @@ -2642,9 +2591,8 @@ ; RV32ZBB-NEXT: srl a0, a0, a2 ; RV32ZBB-NEXT: or a1, a1, a0 ; RV32ZBB-NEXT: .LBB25_6: -; RV32ZBB-NEXT: slti a0, a7, 0 -; RV32ZBB-NEXT: neg a0, a0 -; RV32ZBB-NEXT: and a0, a0, a6 +; RV32ZBB-NEXT: srai a0, a6, 31 +; RV32ZBB-NEXT: and a0, a0, a7 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a5, a1 ; RV32ZBB-NEXT: ret @@ -2657,11 +2605,9 @@ ; RV32XTHEADBB-LABEL: rotr_64_zext: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: srl a5, a1, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a6, a3, 0 -; RV32XTHEADBB-NEXT: neg a6, a6 -; RV32XTHEADBB-NEXT: bltz a3, .LBB25_2 +; RV32XTHEADBB-NEXT: bltz a6, .LBB25_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a5 ; RV32XTHEADBB-NEXT: j .LBB25_3 @@ -2672,13 +2618,14 @@ ; RV32XTHEADBB-NEXT: sll a7, t0, a7 ; RV32XTHEADBB-NEXT: or a3, a3, a7 ; RV32XTHEADBB-NEXT: .LBB25_3: +; RV32XTHEADBB-NEXT: srai a6, a6, 31 ; RV32XTHEADBB-NEXT: and a5, a6, a5 ; RV32XTHEADBB-NEXT: li a6, 32 -; RV32XTHEADBB-NEXT: sub a7, a6, a2 -; RV32XTHEADBB-NEXT: sll a6, a0, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB25_5 +; RV32XTHEADBB-NEXT: sub a6, a6, a2 +; RV32XTHEADBB-NEXT: sll a7, a0, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB25_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a1, a6 +; RV32XTHEADBB-NEXT: mv a1, a7 ; RV32XTHEADBB-NEXT: j .LBB25_6 ; RV32XTHEADBB-NEXT: .LBB25_5: ; RV32XTHEADBB-NEXT: li t0, 64 @@ -2689,9 +2636,8 @@ ; RV32XTHEADBB-NEXT: srl a0, a0, a2 ; RV32XTHEADBB-NEXT: or a1, a1, a0 ; RV32XTHEADBB-NEXT: .LBB25_6: -; RV32XTHEADBB-NEXT: slti a0, a7, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 -; RV32XTHEADBB-NEXT: and a0, a0, a6 +; RV32XTHEADBB-NEXT: srai a0, a6, 31 +; RV32XTHEADBB-NEXT: and a0, a0, a7 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a5, a1 ; RV32XTHEADBB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -5442,15 +5442,13 @@ ; CHECK-NOV-NEXT: .LBB45_6: # %entry ; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB45_7: # %entry -; CHECK-NOV-NEXT: slti a6, a1, 0 -; CHECK-NOV-NEXT: slti a3, s1, 0 -; CHECK-NOV-NEXT: neg a4, a3 +; CHECK-NOV-NEXT: srai a4, s1, 63 ; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 ; CHECK-NOV-NEXT: mv a5, s0 ; CHECK-NOV-NEXT: bltz a4, .LBB45_20 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bgeu a3, s0, .LBB45_21 ; CHECK-NOV-NEXT: .LBB45_9: # %entry ; CHECK-NOV-NEXT: and a1, a6, a1 @@ -5492,7 +5490,7 @@ ; CHECK-NOV-NEXT: j .LBB45_7 ; CHECK-NOV-NEXT: .LBB45_20: # %entry ; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bltu a3, s0, .LBB45_9 ; CHECK-NOV-NEXT: .LBB45_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 @@ -5555,15 +5553,13 @@ ; CHECK-V-NEXT: .LBB45_6: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB45_7: # %entry -; CHECK-V-NEXT: slti a3, s1, 0 -; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: srai a4, s1, 63 ; CHECK-V-NEXT: and a4, a4, s1 -; CHECK-V-NEXT: slti a6, a1, 0 ; CHECK-V-NEXT: slli a3, a2, 63 ; CHECK-V-NEXT: mv a5, s0 ; CHECK-V-NEXT: bltz a4, .LBB45_20 ; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bgeu a3, s0, .LBB45_21 ; CHECK-V-NEXT: .LBB45_9: # %entry ; CHECK-V-NEXT: and a1, a6, a1 @@ -5609,7 +5605,7 @@ ; CHECK-V-NEXT: j .LBB45_7 ; CHECK-V-NEXT: .LBB45_20: # %entry ; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bltu a3, s0, .LBB45_9 ; CHECK-V-NEXT: .LBB45_21: # %entry ; CHECK-V-NEXT: mv s0, a3 @@ -5917,15 +5913,13 @@ ; CHECK-NOV-NEXT: .LBB48_6: # %entry ; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB48_7: # %entry -; CHECK-NOV-NEXT: slti a6, a1, 0 -; CHECK-NOV-NEXT: slti a3, s1, 0 -; CHECK-NOV-NEXT: neg a4, a3 +; CHECK-NOV-NEXT: srai a4, s1, 63 ; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 ; CHECK-NOV-NEXT: mv a5, s0 ; CHECK-NOV-NEXT: bltz a4, .LBB48_20 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bgeu a3, s0, .LBB48_21 ; CHECK-NOV-NEXT: .LBB48_9: # %entry ; CHECK-NOV-NEXT: and a1, a6, a1 @@ -5967,7 +5961,7 @@ ; CHECK-NOV-NEXT: j .LBB48_7 ; CHECK-NOV-NEXT: .LBB48_20: # %entry ; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bltu a3, s0, .LBB48_9 ; CHECK-NOV-NEXT: .LBB48_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 @@ -6030,15 +6024,13 @@ ; CHECK-V-NEXT: .LBB48_6: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB48_7: # %entry -; CHECK-V-NEXT: slti a3, s1, 0 -; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: srai a4, s1, 63 ; CHECK-V-NEXT: and a4, a4, s1 -; CHECK-V-NEXT: slti a6, a1, 0 ; CHECK-V-NEXT: slli a3, a2, 63 ; CHECK-V-NEXT: mv a5, s0 ; CHECK-V-NEXT: bltz a4, .LBB48_20 ; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bgeu a3, s0, .LBB48_21 ; CHECK-V-NEXT: .LBB48_9: # %entry ; CHECK-V-NEXT: and a1, a6, a1 @@ -6084,7 +6076,7 @@ ; CHECK-V-NEXT: j .LBB48_7 ; CHECK-V-NEXT: .LBB48_20: # %entry ; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bltu a3, s0, .LBB48_9 ; CHECK-V-NEXT: .LBB48_21: # %entry ; CHECK-V-NEXT: mv s0, a3 @@ -6394,15 +6386,13 @@ ; CHECK-NOV-NEXT: .LBB51_6: # %entry ; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB51_7: # %entry -; CHECK-NOV-NEXT: slti a6, a1, 0 -; CHECK-NOV-NEXT: slti a3, s1, 0 -; CHECK-NOV-NEXT: neg a4, a3 +; CHECK-NOV-NEXT: srai a4, s1, 63 ; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 ; CHECK-NOV-NEXT: mv a5, s0 ; CHECK-NOV-NEXT: bltz a4, .LBB51_20 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bgeu a3, s0, .LBB51_21 ; CHECK-NOV-NEXT: .LBB51_9: # %entry ; CHECK-NOV-NEXT: and a1, a6, a1 @@ -6444,7 +6434,7 @@ ; CHECK-NOV-NEXT: j .LBB51_7 ; CHECK-NOV-NEXT: .LBB51_20: # %entry ; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bltu a3, s0, .LBB51_9 ; CHECK-NOV-NEXT: .LBB51_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 @@ -6500,15 +6490,13 @@ ; CHECK-V-NEXT: .LBB51_6: # %entry ; CHECK-V-NEXT: mv s0, a4 ; CHECK-V-NEXT: .LBB51_7: # %entry -; CHECK-V-NEXT: slti a6, a1, 0 -; CHECK-V-NEXT: slti a3, s1, 0 -; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: srai a4, s1, 63 ; CHECK-V-NEXT: and a4, a4, s1 ; CHECK-V-NEXT: slli a3, a2, 63 ; CHECK-V-NEXT: mv a5, s0 ; CHECK-V-NEXT: bltz a4, .LBB51_20 ; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bgeu a3, s0, .LBB51_21 ; CHECK-V-NEXT: .LBB51_9: # %entry ; CHECK-V-NEXT: and a1, a6, a1 @@ -6552,7 +6540,7 @@ ; CHECK-V-NEXT: j .LBB51_7 ; CHECK-V-NEXT: .LBB51_20: # %entry ; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bltu a3, s0, .LBB51_9 ; CHECK-V-NEXT: .LBB51_21: # %entry ; CHECK-V-NEXT: mv s0, a3 diff --git a/llvm/test/CodeGen/RISCV/shift-amount-mod.ll b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll --- a/llvm/test/CodeGen/RISCV/shift-amount-mod.ll +++ b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll @@ -25,12 +25,12 @@ ; RV32I-LABEL: shl_by_complemented_64: ; RV32I: # %bb.0: ; RV32I-NEXT: not a4, a0 -; RV32I-NEXT: li a3, 31 -; RV32I-NEXT: sub a3, a3, a0 -; RV32I-NEXT: sll a2, a0, a4 -; RV32I-NEXT: bltz a3, .LBB1_2 +; RV32I-NEXT: li a2, 31 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: sll a3, a0, a4 +; RV32I-NEXT: bltz a2, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: j .LBB1_3 ; RV32I-NEXT: .LBB1_2: ; RV32I-NEXT: sll a1, a1, a4 @@ -41,9 +41,8 @@ ; RV32I-NEXT: srl a0, a0, a4 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: slti a0, a3, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srai a0, a2, 31 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl_by_complemented_64: @@ -77,12 +76,12 @@ ; RV32I-LABEL: lshr_by_complemented_64: ; RV32I: # %bb.0: ; RV32I-NEXT: not a4, a0 -; RV32I-NEXT: li a3, 31 -; RV32I-NEXT: sub a3, a3, a0 -; RV32I-NEXT: srl a2, a1, a4 -; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: li a2, 31 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: srl a3, a1, a4 +; RV32I-NEXT: bltz a2, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: ; RV32I-NEXT: srl a4, a0, a4 @@ -93,9 +92,8 @@ ; RV32I-NEXT: sll a0, a1, a0 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: slti a1, a3, 0 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srai a1, a2, 31 +; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr_by_complemented_64: @@ -197,8 +195,7 @@ ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB7_3: ; RV32I-NEXT: sll a0, a0, a3 -; RV32I-NEXT: slti a2, a2, 0 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: srai a2, a2, 31 ; RV32I-NEXT: and a0, a2, a0 ; RV32I-NEXT: ret ; @@ -233,8 +230,7 @@ ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: .LBB8_3: ; RV32I-NEXT: srl a1, a1, a3 -; RV32I-NEXT: slti a2, a2, 0 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: srai a2, a2, 31 ; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll --- a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll +++ b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll @@ -175,9 +175,8 @@ ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB9_3: ; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: slti a2, a3, 0 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: and a0, a3, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sll_redundant_mask_zeros_i64: @@ -209,9 +208,8 @@ ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: .LBB10_3: ; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: slti a2, a3, 0 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: and a1, a3, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: srl_redundant_mask_zeros_i64: diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -26,8 +26,7 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB0_3: -; RV32I-NEXT: slti a1, a4, 0 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: srai a1, a4, 31 ; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; @@ -119,8 +118,7 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: slti a0, a4, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a4, 31 ; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: ret ; @@ -285,8 +283,7 @@ ; RV64I-NEXT: sll a1, a1, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: slti a1, a4, 0 -; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: srai a1, a4, 63 ; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: ret %1 = lshr i128 %a, %b @@ -574,8 +571,7 @@ ; RV64I-NEXT: srl a0, a0, a2 ; RV64I-NEXT: or a1, a1, a0 ; RV64I-NEXT: .LBB8_3: -; RV64I-NEXT: slti a0, a4, 0 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: srai a0, a4, 63 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: ret %1 = shl i128 %a, %b diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -307,17 +307,16 @@ ; RV32I-NEXT: sll a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a3, a1, 16 ; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: sb a1, 2(a2) ; RV32I-NEXT: srli a1, a0, 24 @@ -444,23 +443,22 @@ ; RV32I-NEXT: srl a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 -; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a0, 16 -; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, a1, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a1, 24 -; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -689,11 +687,9 @@ ; RV64I-NEXT: sll a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a3, a1, 56 ; RV64I-NEXT: sb a3, 15(a2) ; RV64I-NEXT: srli a3, a1, 48 @@ -708,6 +704,7 @@ ; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 ; RV64I-NEXT: sb a1, 7(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -909,39 +906,38 @@ ; RV64I-NEXT: srl a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB7_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 -; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a0, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a0, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: srli a0, a1, 56 -; RV64I-NEXT: sb a0, 7(a2) -; RV64I-NEXT: srli a0, a1, 48 -; RV64I-NEXT: sb a0, 6(a2) -; RV64I-NEXT: srli a0, a1, 40 -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: srli a0, a1, 32 -; RV64I-NEXT: sb a0, 4(a2) -; RV64I-NEXT: srli a0, a1, 24 -; RV64I-NEXT: sb a0, 3(a2) -; RV64I-NEXT: srli a0, a1, 16 -; RV64I-NEXT: sb a0, 2(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 6(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 5(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 3(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 14(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -296,17 +296,16 @@ ; RV32I-NEXT: sll a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a3, a1, 16 ; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: sb a1, 2(a2) ; RV32I-NEXT: srli a1, a0, 24 @@ -430,23 +429,22 @@ ; RV32I-NEXT: srl a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 -; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a0, 16 -; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, a1, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a1, 24 -; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %bitOff = load i64, ptr %bitOff.ptr, align 1 @@ -670,11 +668,9 @@ ; RV64I-NEXT: sll a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a3, a1, 56 ; RV64I-NEXT: sb a3, 15(a2) ; RV64I-NEXT: srli a3, a1, 48 @@ -689,6 +685,7 @@ ; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 ; RV64I-NEXT: sb a1, 7(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -954,39 +951,38 @@ ; RV64I-NEXT: srl a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB7_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 -; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a0, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a0, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: srli a0, a1, 56 -; RV64I-NEXT: sb a0, 7(a2) -; RV64I-NEXT: srli a0, a1, 48 -; RV64I-NEXT: sb a0, 6(a2) -; RV64I-NEXT: srli a0, a1, 40 -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: srli a0, a1, 32 -; RV64I-NEXT: sb a0, 4(a2) -; RV64I-NEXT: srli a0, a1, 24 -; RV64I-NEXT: sb a0, 3(a2) -; RV64I-NEXT: srli a0, a1, 16 -; RV64I-NEXT: sb a0, 2(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 6(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 5(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 3(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 14(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: