diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8971,6 +8971,29 @@ return V; if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // fold (add (setcc x, 0, setlt), -1) -> (sra x, xlen - 1) + // fold (add (setcc 0, x, setgt), -1) -> (sra x, xlen - 1) + if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SETCC && N0.hasOneUse()) { + ISD::CondCode CCVal = cast(N0.getOperand(2))->get(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if ((CCVal == ISD::SETGT && isNullConstant(N00)) || + (CCVal == ISD::SETLT && isNullConstant(N01))) { + + if (CCVal == ISD::SETGT && isNullConstant(N00)) + std::swap(N00, N01); + + unsigned ShAmt = N0.getValueSizeInBits() - 1; + return DAG.getNode(ISD::SRA, DL, VT, N00, DAG.getConstant(ShAmt, DL, VT)); + } + } + // fold (add (select lhs, rhs, cc, 0, y), x) -> // (select lhs, rhs, cc, x, (add x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget); @@ -9022,10 +9045,29 @@ if (SDValue V = combineSubOfBoolean(N, DAG)) return V; - // fold (sub x, (select lhs, rhs, cc, 0, y)) -> - // (select lhs, rhs, cc, x, (sub x, y)) SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + // fold (sub 0, (setcc x, 0, setlt)) -> (sra x , xlen - 1) + // fold (sub 0, (setcc 0, x, setgt)) -> (sra x , xlen - 1) + if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse()) { + ISD::CondCode CCVal = cast(N1.getOperand(2))->get(); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + if ((CCVal == ISD::SETGT && isNullConstant(N1.getOperand(0))) || + (CCVal == ISD::SETLT && isNullConstant(N1.getOperand(1)))) { + + if (CCVal == ISD::SETGT && isNullConstant(N1.getOperand(0))) + std::swap(N10, N11); + + unsigned ShAmt = N0.getValueSizeInBits() - 1; + return DAG.getNode(ISD::SRA, DL, VT, N10, DAG.getConstant(ShAmt, DL, VT)); + } + } + + // fold (sub x, (select lhs, rhs, cc, 0, y)) -> + // (select lhs, rhs, cc, x, (sub x, y)) return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -219,8 +219,7 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: slti a0, a4, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a4, 31 ; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: ret %1 = shl i64 %a, %b @@ -307,8 +306,7 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB15_3: -; RV32I-NEXT: slti a1, a4, 0 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: srai a1, a4, 31 ; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret %1 = lshr i64 %a, %b diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -367,7 +367,7 @@ ; RV32-NEXT: addi a1, a1, 722 ; RV32-NEXT: srl a1, a1, a0 ; RV32-NEXT: addi a0, a0, -32 -; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; @@ -407,7 +407,7 @@ ; RV32-NEXT: addi a1, a1, 722 ; RV32-NEXT: srl a1, a1, a0 ; RV32-NEXT: addi a0, a0, -32 -; RV32-NEXT: slti a0, a0, 0 +; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -309,8 +309,7 @@ ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: srai s3, a0, 31 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixunsdfsi@plt @@ -350,8 +349,7 @@ ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: j .LBB6_3 ; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB6_3: # %start ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -626,8 +624,7 @@ ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and s3, a0, s3 ; RV32I-NEXT: li a2, -1 ; RV32I-NEXT: mv a0, s1 @@ -801,8 +798,7 @@ ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s5, a0, -1 +; RV32I-NEXT: srai s5, a0, 31 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixunsdfdi@plt @@ -821,8 +817,7 @@ ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and a0, a0, s3 ; RV32I-NEXT: or a1, s2, a0 ; RV32I-NEXT: mv a0, s4 @@ -845,8 +840,7 @@ ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gedf2@plt -; RV64I-NEXT: slti a0, a0, 0 -; RV64I-NEXT: addi s1, a0, -1 +; RV64I-NEXT: srai s1, a0, 63 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixunsdfdi@plt ; RV64I-NEXT: and s1, s1, a0 @@ -1500,9 +1494,8 @@ ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: j .LBB28_3 ; RV32I-NEXT: .LBB28_2: -; RV32I-NEXT: slti a2, s0, 0 -; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: srai s0, s0, 31 +; RV32I-NEXT: and a0, s0, a0 ; RV32I-NEXT: .LBB28_3: # %start ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1539,8 +1532,7 @@ ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB28_3 ; RV64I-NEXT: .LBB28_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB28_3: # %start ; RV64I-NEXT: and a0, a0, a1 @@ -1803,9 +1795,8 @@ ; RV32I-NEXT: li a0, 255 ; RV32I-NEXT: j .LBB32_3 ; RV32I-NEXT: .LBB32_2: -; RV32I-NEXT: slti a1, s0, 0 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: srai s0, s0, 31 +; RV32I-NEXT: and a0, s0, a0 ; RV32I-NEXT: .LBB32_3: # %start ; RV32I-NEXT: andi a0, a0, 255 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1839,8 +1830,7 @@ ; RV64I-NEXT: li a0, 255 ; RV64I-NEXT: j .LBB32_3 ; RV64I-NEXT: .LBB32_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB32_3: # %start ; RV64I-NEXT: andi a0, a0, 255 @@ -1898,8 +1888,7 @@ ; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __gedf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s3, a0, -1 +; RV32I-NEXT: srai s3, a0, 31 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __fixunsdfsi@plt @@ -1939,8 +1928,7 @@ ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: j .LBB33_3 ; RV64I-NEXT: .LBB33_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB33_3: # %start ; RV64I-NEXT: slli a0, a0, 32 diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -228,8 +228,7 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: srai s1, a0, 31 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi@plt ; RV32I-NEXT: and s1, s1, a0 @@ -270,8 +269,7 @@ ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: j .LBB4_3 ; RV64I-NEXT: .LBB4_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB4_3: # %start ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -596,8 +594,7 @@ ; RV32I-NEXT: lui a1, 913408 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and s1, a0, s1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s2 @@ -750,8 +747,7 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: srai s2, a0, 31 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi@plt ; RV32I-NEXT: mv s1, a1 @@ -767,8 +763,7 @@ ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and s1, a0, s1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s2 @@ -794,8 +789,7 @@ ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: slti a0, a0, 0 -; RV64I-NEXT: addi s1, a0, -1 +; RV64I-NEXT: srai s1, a0, 63 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixunssfdi@plt ; RV64I-NEXT: and s1, s1, a0 @@ -1341,8 +1335,7 @@ ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: j .LBB26_3 ; RV32I-NEXT: .LBB26_2: -; RV32I-NEXT: slti a0, s0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, s0, 31 ; RV32I-NEXT: and a0, a0, s1 ; RV32I-NEXT: .LBB26_3: # %start ; RV32I-NEXT: and a0, a0, a1 @@ -1378,8 +1371,7 @@ ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB26_3 ; RV64I-NEXT: .LBB26_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB26_3: # %start ; RV64I-NEXT: and a0, a0, a1 @@ -1618,8 +1610,7 @@ ; RV32I-NEXT: li a0, 255 ; RV32I-NEXT: j .LBB30_3 ; RV32I-NEXT: .LBB30_2: -; RV32I-NEXT: slti a0, s0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, s0, 31 ; RV32I-NEXT: and a0, a0, s1 ; RV32I-NEXT: .LBB30_3: # %start ; RV32I-NEXT: andi a0, a0, 255 @@ -1652,8 +1643,7 @@ ; RV64I-NEXT: li a0, 255 ; RV64I-NEXT: j .LBB30_3 ; RV64I-NEXT: .LBB30_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB30_3: # %start ; RV64I-NEXT: andi a0, a0, 255 @@ -1699,8 +1689,7 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s1, a0, -1 +; RV32I-NEXT: srai s1, a0, 31 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi@plt ; RV32I-NEXT: and s1, s1, a0 @@ -1741,8 +1730,7 @@ ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: j .LBB31_3 ; RV64I-NEXT: .LBB31_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB31_3: # %start ; RV64I-NEXT: slli a0, a0, 32 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1969,8 +1969,7 @@ ; RV32IF-NEXT: .LBB27_3: # %entry ; RV32IF-NEXT: mv a0, a3 ; RV32IF-NEXT: .LBB27_4: # %entry -; RV32IF-NEXT: slti a3, a1, 0 -; RV32IF-NEXT: neg a3, a3 +; RV32IF-NEXT: srai a3, a1, 31 ; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: mv a3, a0 ; RV32IF-NEXT: bltz a1, .LBB27_11 @@ -2302,8 +2301,7 @@ ; RV32-NEXT: .LBB33_3: # %entry ; RV32-NEXT: mv a0, a3 ; RV32-NEXT: .LBB33_4: # %entry -; RV32-NEXT: slti a3, a1, 0 -; RV32-NEXT: neg a3, a3 +; RV32-NEXT: srai a3, a1, 31 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: mv a3, a0 ; RV32-NEXT: bltz a1, .LBB33_11 @@ -3024,8 +3022,7 @@ ; RV32IF-NEXT: .LBB45_13: # %entry ; RV32IF-NEXT: beq a6, a7, .LBB45_15 ; RV32IF-NEXT: .LBB45_14: # %entry -; RV32IF-NEXT: slti a0, a5, 0 -; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: srai a0, a5, 31 ; RV32IF-NEXT: and a0, a0, a2 ; RV32IF-NEXT: .LBB45_15: # %entry ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -3072,8 +3069,7 @@ ; RV64IF-NEXT: .LBB45_3: # %entry ; RV64IF-NEXT: mv a0, a3 ; RV64IF-NEXT: .LBB45_4: # %entry -; RV64IF-NEXT: slti a3, a1, 0 -; RV64IF-NEXT: neg a3, a3 +; RV64IF-NEXT: srai a3, a1, 63 ; RV64IF-NEXT: and a1, a3, a1 ; RV64IF-NEXT: slli a4, a2, 63 ; RV64IF-NEXT: mv a3, a0 @@ -3163,8 +3159,7 @@ ; RV32IFD-NEXT: .LBB45_13: # %entry ; RV32IFD-NEXT: beq a6, a7, .LBB45_15 ; RV32IFD-NEXT: .LBB45_14: # %entry -; RV32IFD-NEXT: slti a0, a5, 0 -; RV32IFD-NEXT: addi a0, a0, -1 +; RV32IFD-NEXT: srai a0, a5, 31 ; RV32IFD-NEXT: and a0, a0, a2 ; RV32IFD-NEXT: .LBB45_15: # %entry ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -3528,8 +3523,7 @@ ; RV32-NEXT: .LBB48_13: # %entry ; RV32-NEXT: beq a6, a7, .LBB48_15 ; RV32-NEXT: .LBB48_14: # %entry -; RV32-NEXT: slti a0, a5, 0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: srai a0, a5, 31 ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: .LBB48_15: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -3795,8 +3789,7 @@ ; RV32-NEXT: .LBB51_13: # %entry ; RV32-NEXT: beq a6, a7, .LBB51_15 ; RV32-NEXT: .LBB51_14: # %entry -; RV32-NEXT: slti a0, a5, 0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: srai a0, a5, 31 ; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: .LBB51_15: # %entry ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -3845,8 +3838,7 @@ ; RV64-NEXT: .LBB51_3: # %entry ; RV64-NEXT: mv a0, a3 ; RV64-NEXT: .LBB51_4: # %entry -; RV64-NEXT: slti a3, a1, 0 -; RV64-NEXT: neg a3, a3 +; RV64-NEXT: srai a3, a1, 63 ; RV64-NEXT: and a1, a3, a1 ; RV64-NEXT: slli a4, a2, 63 ; RV64-NEXT: mv a3, a0 diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll @@ -5355,15 +5355,13 @@ ; CHECK-NOV-NEXT: .LBB45_6: # %entry ; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB45_7: # %entry -; CHECK-NOV-NEXT: slti a6, a1, 0 -; CHECK-NOV-NEXT: slti a3, s1, 0 -; CHECK-NOV-NEXT: neg a4, a3 +; CHECK-NOV-NEXT: srai a4, s1, 63 ; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 ; CHECK-NOV-NEXT: mv a5, s0 ; CHECK-NOV-NEXT: bltz a4, .LBB45_20 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bgeu a3, s0, .LBB45_21 ; CHECK-NOV-NEXT: .LBB45_9: # %entry ; CHECK-NOV-NEXT: and a1, a6, a1 @@ -5405,7 +5403,7 @@ ; CHECK-NOV-NEXT: j .LBB45_7 ; CHECK-NOV-NEXT: .LBB45_20: # %entry ; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bltu a3, s0, .LBB45_9 ; CHECK-NOV-NEXT: .LBB45_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 @@ -5468,15 +5466,13 @@ ; CHECK-V-NEXT: .LBB45_6: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB45_7: # %entry -; CHECK-V-NEXT: slti a3, s1, 0 -; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: srai a4, s1, 63 ; CHECK-V-NEXT: and a4, a4, s1 -; CHECK-V-NEXT: slti a6, a1, 0 ; CHECK-V-NEXT: slli a3, a2, 63 ; CHECK-V-NEXT: mv a5, s0 ; CHECK-V-NEXT: bltz a4, .LBB45_20 ; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bgeu a3, s0, .LBB45_21 ; CHECK-V-NEXT: .LBB45_9: # %entry ; CHECK-V-NEXT: and a1, a6, a1 @@ -5527,7 +5523,7 @@ ; CHECK-V-NEXT: j .LBB45_7 ; CHECK-V-NEXT: .LBB45_20: # %entry ; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bltu a3, s0, .LBB45_9 ; CHECK-V-NEXT: .LBB45_21: # %entry ; CHECK-V-NEXT: mv s0, a3 @@ -5845,15 +5841,13 @@ ; CHECK-NOV-NEXT: .LBB48_6: # %entry ; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB48_7: # %entry -; CHECK-NOV-NEXT: slti a6, a1, 0 -; CHECK-NOV-NEXT: slti a3, s1, 0 -; CHECK-NOV-NEXT: neg a4, a3 +; CHECK-NOV-NEXT: srai a4, s1, 63 ; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 ; CHECK-NOV-NEXT: mv a5, s0 ; CHECK-NOV-NEXT: bltz a4, .LBB48_20 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bgeu a3, s0, .LBB48_21 ; CHECK-NOV-NEXT: .LBB48_9: # %entry ; CHECK-NOV-NEXT: and a1, a6, a1 @@ -5895,7 +5889,7 @@ ; CHECK-NOV-NEXT: j .LBB48_7 ; CHECK-NOV-NEXT: .LBB48_20: # %entry ; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bltu a3, s0, .LBB48_9 ; CHECK-NOV-NEXT: .LBB48_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 @@ -5958,15 +5952,13 @@ ; CHECK-V-NEXT: .LBB48_6: # %entry ; CHECK-V-NEXT: mv a0, a4 ; CHECK-V-NEXT: .LBB48_7: # %entry -; CHECK-V-NEXT: slti a3, s1, 0 -; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: srai a4, s1, 63 ; CHECK-V-NEXT: and a4, a4, s1 -; CHECK-V-NEXT: slti a6, a1, 0 ; CHECK-V-NEXT: slli a3, a2, 63 ; CHECK-V-NEXT: mv a5, s0 ; CHECK-V-NEXT: bltz a4, .LBB48_20 ; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bgeu a3, s0, .LBB48_21 ; CHECK-V-NEXT: .LBB48_9: # %entry ; CHECK-V-NEXT: and a1, a6, a1 @@ -6017,7 +6009,7 @@ ; CHECK-V-NEXT: j .LBB48_7 ; CHECK-V-NEXT: .LBB48_20: # %entry ; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bltu a3, s0, .LBB48_9 ; CHECK-V-NEXT: .LBB48_21: # %entry ; CHECK-V-NEXT: mv s0, a3 @@ -6337,15 +6329,13 @@ ; CHECK-NOV-NEXT: .LBB51_6: # %entry ; CHECK-NOV-NEXT: mv s0, a4 ; CHECK-NOV-NEXT: .LBB51_7: # %entry -; CHECK-NOV-NEXT: slti a6, a1, 0 -; CHECK-NOV-NEXT: slti a3, s1, 0 -; CHECK-NOV-NEXT: neg a4, a3 +; CHECK-NOV-NEXT: srai a4, s1, 63 ; CHECK-NOV-NEXT: and a4, a4, s1 ; CHECK-NOV-NEXT: slli a3, a0, 63 ; CHECK-NOV-NEXT: mv a5, s0 ; CHECK-NOV-NEXT: bltz a4, .LBB51_20 ; CHECK-NOV-NEXT: # %bb.8: # %entry -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bgeu a3, s0, .LBB51_21 ; CHECK-NOV-NEXT: .LBB51_9: # %entry ; CHECK-NOV-NEXT: and a1, a6, a1 @@ -6387,7 +6377,7 @@ ; CHECK-NOV-NEXT: j .LBB51_7 ; CHECK-NOV-NEXT: .LBB51_20: # %entry ; CHECK-NOV-NEXT: mv a5, a3 -; CHECK-NOV-NEXT: neg a6, a6 +; CHECK-NOV-NEXT: srai a6, a1, 63 ; CHECK-NOV-NEXT: bltu a3, s0, .LBB51_9 ; CHECK-NOV-NEXT: .LBB51_21: # %entry ; CHECK-NOV-NEXT: mv s0, a3 @@ -6443,15 +6433,13 @@ ; CHECK-V-NEXT: .LBB51_6: # %entry ; CHECK-V-NEXT: mv s0, a4 ; CHECK-V-NEXT: .LBB51_7: # %entry -; CHECK-V-NEXT: slti a6, a1, 0 -; CHECK-V-NEXT: slti a3, s1, 0 -; CHECK-V-NEXT: neg a4, a3 +; CHECK-V-NEXT: srai a4, s1, 63 ; CHECK-V-NEXT: and a4, a4, s1 ; CHECK-V-NEXT: slli a3, a2, 63 ; CHECK-V-NEXT: mv a5, s0 ; CHECK-V-NEXT: bltz a4, .LBB51_20 ; CHECK-V-NEXT: # %bb.8: # %entry -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bgeu a3, s0, .LBB51_21 ; CHECK-V-NEXT: .LBB51_9: # %entry ; CHECK-V-NEXT: and a1, a6, a1 @@ -6500,7 +6488,7 @@ ; CHECK-V-NEXT: j .LBB51_7 ; CHECK-V-NEXT: .LBB51_20: # %entry ; CHECK-V-NEXT: mv a5, a3 -; CHECK-V-NEXT: neg a6, a6 +; CHECK-V-NEXT: srai a6, a1, 63 ; CHECK-V-NEXT: bltu a3, s0, .LBB51_9 ; CHECK-V-NEXT: .LBB51_21: # %entry ; CHECK-V-NEXT: mv s0, a3 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -398,9 +398,8 @@ ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: bgtz a0, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slti a0, s2, 0 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and s0, a0, s1 +; RV32I-NEXT: srai s0, s2, 31 +; RV32I-NEXT: and s0, s0, s1 ; RV32I-NEXT: .LBB3_2: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -436,9 +435,8 @@ ; RV64I-NEXT: call __gtsf2@plt ; RV64I-NEXT: bgtz a0, .LBB3_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: slti a0, s2, 0 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and s0, a0, s1 +; RV64I-NEXT: srai s0, s2, 63 +; RV64I-NEXT: and s0, s0, s1 ; RV64I-NEXT: .LBB3_2: # %start ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -849,29 +847,26 @@ ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s1, a0 -; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: srai s1, a0, 31 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: and s1, s1, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or a0, a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -902,8 +897,7 @@ ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: j .LBB8_3 ; RV64I-NEXT: .LBB8_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB8_3: # %start ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -1123,50 +1117,49 @@ ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lui a1, 913408 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixsfdi@plt ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a1 ; RV32I-NEXT: lui s4, 524288 -; RV32I-NEXT: bgez s3, .LBB10_2 +; RV32I-NEXT: bgez s2, .LBB10_2 ; RV32I-NEXT: # %bb.1: # %start -; RV32I-NEXT: lui s2, 524288 +; RV32I-NEXT: lui s3, 524288 ; RV32I-NEXT: .LBB10_2: # %start -; RV32I-NEXT: lui s3, 389120 -; RV32I-NEXT: addi s3, s3, -1 +; RV32I-NEXT: lui s2, 389120 +; RV32I-NEXT: addi s2, s2, -1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: blez a0, .LBB10_4 ; RV32I-NEXT: # %bb.3: # %start -; RV32I-NEXT: addi s2, s4, -1 +; RV32I-NEXT: addi s3, s4, -1 ; RV32I-NEXT: .LBB10_4: # %start ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and s2, a0, s2 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s3 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s3, a0 +; RV32I-NEXT: and s3, a0, s3 ; RV32I-NEXT: lui a1, 913408 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: or s1, s3, a0 +; RV32I-NEXT: srai a0, a0, 31 +; RV32I-NEXT: and s1, a0, s1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or s1, a0, s1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s0 ; RV32I-NEXT: call __unordsf2@plt ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: addi a0, a0, -1 ; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1462,46 +1455,42 @@ ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui s1, 391168 -; RV32I-NEXT: addi s1, s1, -1 -; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s3, a0 -; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s4, a0, -1 +; RV32I-NEXT: srai s2, a0, 31 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfdi@plt -; RV32I-NEXT: mv s2, a1 -; RV32I-NEXT: and a0, s4, a0 -; RV32I-NEXT: or s3, s3, a0 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: and s3, s2, a0 +; RV32I-NEXT: lui s2, 391168 +; RV32I-NEXT: addi s2, s2, -1 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __gtsf2@plt ; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s1, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or s3, a0, s3 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a1, a0, s2 -; RV32I-NEXT: or a1, s1, a1 +; RV32I-NEXT: srai a0, a0, 31 +; RV32I-NEXT: and s1, a0, s1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a1, s2 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: or a1, a1, s1 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret ; @@ -1511,29 +1500,26 @@ ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: slli a0, a0, 48 ; RV64I-NEXT: srli a0, a0, 48 ; RV64I-NEXT: call __extendhfsf2@plt ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lui a1, 391168 -; RV64I-NEXT: addiw a1, a1, -1 -; RV64I-NEXT: call __gtsf2@plt -; RV64I-NEXT: sgtz a0, a0 -; RV64I-NEXT: neg s1, a0 -; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: call __gesf2@plt -; RV64I-NEXT: slti a0, a0, 0 -; RV64I-NEXT: addi s2, a0, -1 +; RV64I-NEXT: srai s1, a0, 63 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __fixunssfdi@plt -; RV64I-NEXT: and a0, s2, a0 -; RV64I-NEXT: or a0, s1, a0 +; RV64I-NEXT: and s1, s1, a0 +; RV64I-NEXT: lui a1, 391168 +; RV64I-NEXT: addiw a1, a1, -1 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: call __gtsf2@plt +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: or a0, a0, s1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret ; @@ -3092,8 +3078,7 @@ ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: j .LBB34_3 ; RV32I-NEXT: .LBB34_2: -; RV32I-NEXT: slti a0, s1, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, s1, 31 ; RV32I-NEXT: and a0, a0, s0 ; RV32I-NEXT: .LBB34_3: # %start ; RV32I-NEXT: and a0, a0, s3 @@ -3133,8 +3118,7 @@ ; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: j .LBB34_3 ; RV64I-NEXT: .LBB34_2: -; RV64I-NEXT: slti a0, s1, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s1, 63 ; RV64I-NEXT: and a0, a0, s0 ; RV64I-NEXT: .LBB34_3: # %start ; RV64I-NEXT: and a0, a0, s3 @@ -3550,8 +3534,7 @@ ; RV32I-NEXT: li a0, 255 ; RV32I-NEXT: j .LBB38_3 ; RV32I-NEXT: .LBB38_2: -; RV32I-NEXT: slti a0, s0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, s0, 31 ; RV32I-NEXT: and a0, a0, s1 ; RV32I-NEXT: .LBB38_3: # %start ; RV32I-NEXT: andi a0, a0, 255 @@ -3587,8 +3570,7 @@ ; RV64I-NEXT: li a0, 255 ; RV64I-NEXT: j .LBB38_3 ; RV64I-NEXT: .LBB38_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB38_3: # %start ; RV64I-NEXT: andi a0, a0, 255 @@ -3673,29 +3655,26 @@ ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 ; RV32I-NEXT: call __extendhfsf2@plt ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lui a1, 325632 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: call __gtsf2@plt -; RV32I-NEXT: sgtz a0, a0 -; RV32I-NEXT: neg s1, a0 -; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: call __gesf2@plt -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi s2, a0, -1 +; RV32I-NEXT: srai s1, a0, 31 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __fixunssfsi@plt -; RV32I-NEXT: and a0, s2, a0 -; RV32I-NEXT: or a0, s1, a0 +; RV32I-NEXT: and s1, s1, a0 +; RV32I-NEXT: lui a1, 325632 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: call __gtsf2@plt +; RV32I-NEXT: sgtz a0, a0 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: or a0, a0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; @@ -3726,8 +3705,7 @@ ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: j .LBB39_3 ; RV64I-NEXT: .LBB39_2: -; RV64I-NEXT: slti a0, s0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, s0, 63 ; RV64I-NEXT: and a0, a0, s1 ; RV64I-NEXT: .LBB39_3: # %start ; RV64I-NEXT: slli a0, a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -119,11 +119,9 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64: ; RV32I: # %bb.0: +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: sll a4, a0, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a5, a3, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a3, .LBB2_2 +; RV32I-NEXT: bltz a5, .LBB2_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: j .LBB2_3 @@ -134,14 +132,15 @@ ; RV32I-NEXT: srl a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB2_3: +; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: and a4, a5, a4 ; RV32I-NEXT: neg a7, a2 ; RV32I-NEXT: li a5, 32 -; RV32I-NEXT: sub a6, a5, a2 -; RV32I-NEXT: srl a5, a1, a7 -; RV32I-NEXT: bltz a6, .LBB2_5 +; RV32I-NEXT: sub a5, a5, a2 +; RV32I-NEXT: srl a6, a1, a7 +; RV32I-NEXT: bltz a5, .LBB2_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, a5 +; RV32I-NEXT: mv a0, a6 ; RV32I-NEXT: j .LBB2_6 ; RV32I-NEXT: .LBB2_5: ; RV32I-NEXT: srl a0, a0, a7 @@ -152,9 +151,8 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB2_6: -; RV32I-NEXT: slti a1, a6, 0 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: and a1, a1, a5 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a1, a5, a6 ; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: ret @@ -169,11 +167,9 @@ ; ; RV32ZBB-LABEL: rotl_64: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: sll a4, a0, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a5, a3, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a3, .LBB2_2 +; RV32ZBB-NEXT: bltz a5, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 ; RV32ZBB-NEXT: j .LBB2_3 @@ -184,14 +180,15 @@ ; RV32ZBB-NEXT: srl a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB2_3: +; RV32ZBB-NEXT: srai a5, a5, 31 ; RV32ZBB-NEXT: and a4, a5, a4 ; RV32ZBB-NEXT: neg a7, a2 ; RV32ZBB-NEXT: li a5, 32 -; RV32ZBB-NEXT: sub a6, a5, a2 -; RV32ZBB-NEXT: srl a5, a1, a7 -; RV32ZBB-NEXT: bltz a6, .LBB2_5 +; RV32ZBB-NEXT: sub a5, a5, a2 +; RV32ZBB-NEXT: srl a6, a1, a7 +; RV32ZBB-NEXT: bltz a5, .LBB2_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a0, a5 +; RV32ZBB-NEXT: mv a0, a6 ; RV32ZBB-NEXT: j .LBB2_6 ; RV32ZBB-NEXT: .LBB2_5: ; RV32ZBB-NEXT: srl a0, a0, a7 @@ -202,9 +199,8 @@ ; RV32ZBB-NEXT: sll a1, a1, a2 ; RV32ZBB-NEXT: or a0, a0, a1 ; RV32ZBB-NEXT: .LBB2_6: -; RV32ZBB-NEXT: slti a1, a6, 0 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: and a1, a1, a5 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a1, a5, a6 ; RV32ZBB-NEXT: or a1, a3, a1 ; RV32ZBB-NEXT: or a0, a4, a0 ; RV32ZBB-NEXT: ret @@ -216,11 +212,9 @@ ; ; RV32XTHEADBB-LABEL: rotl_64: ; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: sll a4, a0, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a5, a3, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a3, .LBB2_2 +; RV32XTHEADBB-NEXT: bltz a5, .LBB2_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 ; RV32XTHEADBB-NEXT: j .LBB2_3 @@ -231,14 +225,15 @@ ; RV32XTHEADBB-NEXT: srl a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB2_3: +; RV32XTHEADBB-NEXT: srai a5, a5, 31 ; RV32XTHEADBB-NEXT: and a4, a5, a4 ; RV32XTHEADBB-NEXT: neg a7, a2 ; RV32XTHEADBB-NEXT: li a5, 32 -; RV32XTHEADBB-NEXT: sub a6, a5, a2 -; RV32XTHEADBB-NEXT: srl a5, a1, a7 -; RV32XTHEADBB-NEXT: bltz a6, .LBB2_5 +; RV32XTHEADBB-NEXT: sub a5, a5, a2 +; RV32XTHEADBB-NEXT: srl a6, a1, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB2_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a0, a5 +; RV32XTHEADBB-NEXT: mv a0, a6 ; RV32XTHEADBB-NEXT: j .LBB2_6 ; RV32XTHEADBB-NEXT: .LBB2_5: ; RV32XTHEADBB-NEXT: srl a0, a0, a7 @@ -249,9 +244,8 @@ ; RV32XTHEADBB-NEXT: sll a1, a1, a2 ; RV32XTHEADBB-NEXT: or a0, a0, a1 ; RV32XTHEADBB-NEXT: .LBB2_6: -; RV32XTHEADBB-NEXT: slti a1, a6, 0 -; RV32XTHEADBB-NEXT: neg a1, a1 -; RV32XTHEADBB-NEXT: and a1, a1, a5 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a1, a5, a6 ; RV32XTHEADBB-NEXT: or a1, a3, a1 ; RV32XTHEADBB-NEXT: or a0, a4, a0 ; RV32XTHEADBB-NEXT: ret @@ -273,11 +267,9 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64: ; RV32I: # %bb.0: +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: srl a4, a1, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a5, a3, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: bltz a5, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: j .LBB3_3 @@ -288,14 +280,15 @@ ; RV32I-NEXT: sll a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB3_3: +; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: and a4, a5, a4 ; RV32I-NEXT: neg a7, a2 ; RV32I-NEXT: li a5, 32 -; RV32I-NEXT: sub a6, a5, a2 -; RV32I-NEXT: sll a5, a0, a7 -; RV32I-NEXT: bltz a6, .LBB3_5 +; RV32I-NEXT: sub a5, a5, a2 +; RV32I-NEXT: sll a6, a0, a7 +; RV32I-NEXT: bltz a5, .LBB3_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: mv a1, a6 ; RV32I-NEXT: j .LBB3_6 ; RV32I-NEXT: .LBB3_5: ; RV32I-NEXT: sll a1, a1, a7 @@ -306,9 +299,8 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB3_6: -; RV32I-NEXT: slti a0, a6, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, a5 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a0, a5, a6 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a4, a1 ; RV32I-NEXT: ret @@ -323,11 +315,9 @@ ; ; RV32ZBB-LABEL: rotr_64: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: srl a4, a1, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a5, a3, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a3, .LBB3_2 +; RV32ZBB-NEXT: bltz a5, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 ; RV32ZBB-NEXT: j .LBB3_3 @@ -338,14 +328,15 @@ ; RV32ZBB-NEXT: sll a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB3_3: +; RV32ZBB-NEXT: srai a5, a5, 31 ; RV32ZBB-NEXT: and a4, a5, a4 ; RV32ZBB-NEXT: neg a7, a2 ; RV32ZBB-NEXT: li a5, 32 -; RV32ZBB-NEXT: sub a6, a5, a2 -; RV32ZBB-NEXT: sll a5, a0, a7 -; RV32ZBB-NEXT: bltz a6, .LBB3_5 +; RV32ZBB-NEXT: sub a5, a5, a2 +; RV32ZBB-NEXT: sll a6, a0, a7 +; RV32ZBB-NEXT: bltz a5, .LBB3_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a1, a5 +; RV32ZBB-NEXT: mv a1, a6 ; RV32ZBB-NEXT: j .LBB3_6 ; RV32ZBB-NEXT: .LBB3_5: ; RV32ZBB-NEXT: sll a1, a1, a7 @@ -356,9 +347,8 @@ ; RV32ZBB-NEXT: srl a0, a0, a2 ; RV32ZBB-NEXT: or a1, a1, a0 ; RV32ZBB-NEXT: .LBB3_6: -; RV32ZBB-NEXT: slti a0, a6, 0 -; RV32ZBB-NEXT: neg a0, a0 -; RV32ZBB-NEXT: and a0, a0, a5 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a0, a5, a6 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a4, a1 ; RV32ZBB-NEXT: ret @@ -370,11 +360,9 @@ ; ; RV32XTHEADBB-LABEL: rotr_64: ; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: srl a4, a1, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a5, a3, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a3, .LBB3_2 +; RV32XTHEADBB-NEXT: bltz a5, .LBB3_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 ; RV32XTHEADBB-NEXT: j .LBB3_3 @@ -385,14 +373,15 @@ ; RV32XTHEADBB-NEXT: sll a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB3_3: +; RV32XTHEADBB-NEXT: srai a5, a5, 31 ; RV32XTHEADBB-NEXT: and a4, a5, a4 ; RV32XTHEADBB-NEXT: neg a7, a2 ; RV32XTHEADBB-NEXT: li a5, 32 -; RV32XTHEADBB-NEXT: sub a6, a5, a2 -; RV32XTHEADBB-NEXT: sll a5, a0, a7 -; RV32XTHEADBB-NEXT: bltz a6, .LBB3_5 +; RV32XTHEADBB-NEXT: sub a5, a5, a2 +; RV32XTHEADBB-NEXT: sll a6, a0, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB3_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a1, a5 +; RV32XTHEADBB-NEXT: mv a1, a6 ; RV32XTHEADBB-NEXT: j .LBB3_6 ; RV32XTHEADBB-NEXT: .LBB3_5: ; RV32XTHEADBB-NEXT: sll a1, a1, a7 @@ -403,9 +392,8 @@ ; RV32XTHEADBB-NEXT: srl a0, a0, a2 ; RV32XTHEADBB-NEXT: or a1, a1, a0 ; RV32XTHEADBB-NEXT: .LBB3_6: -; RV32XTHEADBB-NEXT: slti a0, a6, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 -; RV32XTHEADBB-NEXT: and a0, a0, a5 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a0, a5, a6 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a4, a1 ; RV32XTHEADBB-NEXT: ret @@ -748,29 +736,27 @@ ; RV32I-NEXT: srl a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: slti a5, a5, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: and a4, a5, a4 -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: srl a2, a1, a6 -; RV32I-NEXT: andi a5, a6, 63 -; RV32I-NEXT: addi a7, a5, -32 -; RV32I-NEXT: slti t0, a7, 0 -; RV32I-NEXT: neg t0, t0 -; RV32I-NEXT: and a2, t0, a2 -; RV32I-NEXT: bltz a7, .LBB10_5 +; RV32I-NEXT: srai t0, a5, 31 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: andi a7, a5, 63 +; RV32I-NEXT: addi a6, a7, -32 +; RV32I-NEXT: and a2, t0, a4 +; RV32I-NEXT: bltz a6, .LBB10_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: srl a0, a1, a7 ; RV32I-NEXT: j .LBB10_6 ; RV32I-NEXT: .LBB10_5: -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: not a5, a5 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a5 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srl a0, a0, a5 +; RV32I-NEXT: not a4, a7 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a4, a7, a4 +; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: .LBB10_6: -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a3, a2 +; RV32I-NEXT: srl a1, a1, a5 +; RV32I-NEXT: srai a4, a6, 31 +; RV32I-NEXT: and a1, a4, a1 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask: @@ -796,29 +782,27 @@ ; RV32ZBB-NEXT: srl a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB10_3: -; RV32ZBB-NEXT: slti a5, a5, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: and a4, a5, a4 -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: srl a2, a1, a6 -; RV32ZBB-NEXT: andi a5, a6, 63 -; RV32ZBB-NEXT: addi a7, a5, -32 -; RV32ZBB-NEXT: slti t0, a7, 0 -; RV32ZBB-NEXT: neg t0, t0 -; RV32ZBB-NEXT: and a2, t0, a2 -; RV32ZBB-NEXT: bltz a7, .LBB10_5 +; RV32ZBB-NEXT: srai t0, a5, 31 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: andi a7, a5, 63 +; RV32ZBB-NEXT: addi a6, a7, -32 +; RV32ZBB-NEXT: and a2, t0, a4 +; RV32ZBB-NEXT: bltz a6, .LBB10_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: srl a0, a1, a7 ; RV32ZBB-NEXT: j .LBB10_6 ; RV32ZBB-NEXT: .LBB10_5: -; RV32ZBB-NEXT: srl a0, a0, a6 -; RV32ZBB-NEXT: not a5, a5 -; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: sll a1, a1, a5 -; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srl a0, a0, a5 +; RV32ZBB-NEXT: not a4, a7 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a4, a7, a4 +; RV32ZBB-NEXT: or a0, a0, a4 ; RV32ZBB-NEXT: .LBB10_6: -; RV32ZBB-NEXT: or a0, a4, a0 -; RV32ZBB-NEXT: or a1, a3, a2 +; RV32ZBB-NEXT: srl a1, a1, a5 +; RV32ZBB-NEXT: srai a4, a6, 31 +; RV32ZBB-NEXT: and a1, a4, a1 +; RV32ZBB-NEXT: or a1, a3, a1 +; RV32ZBB-NEXT: or a0, a2, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask: @@ -841,29 +825,27 @@ ; RV32XTHEADBB-NEXT: srl a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB10_3: -; RV32XTHEADBB-NEXT: slti a5, a5, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: and a4, a5, a4 -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: srl a2, a1, a6 -; RV32XTHEADBB-NEXT: andi a5, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, a5, -32 -; RV32XTHEADBB-NEXT: slti t0, a7, 0 -; RV32XTHEADBB-NEXT: neg t0, t0 -; RV32XTHEADBB-NEXT: and a2, t0, a2 -; RV32XTHEADBB-NEXT: bltz a7, .LBB10_5 +; RV32XTHEADBB-NEXT: srai t0, a5, 31 +; RV32XTHEADBB-NEXT: neg a5, a2 +; RV32XTHEADBB-NEXT: andi a7, a5, 63 +; RV32XTHEADBB-NEXT: addi a6, a7, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB10_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: srl a0, a1, a5 +; RV32XTHEADBB-NEXT: srl a0, a1, a7 ; RV32XTHEADBB-NEXT: j .LBB10_6 ; RV32XTHEADBB-NEXT: .LBB10_5: -; RV32XTHEADBB-NEXT: srl a0, a0, a6 -; RV32XTHEADBB-NEXT: not a5, a5 -; RV32XTHEADBB-NEXT: slli a1, a1, 1 -; RV32XTHEADBB-NEXT: sll a1, a1, a5 -; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a5 +; RV32XTHEADBB-NEXT: not a4, a7 +; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: sll a4, a7, a4 +; RV32XTHEADBB-NEXT: or a0, a0, a4 ; RV32XTHEADBB-NEXT: .LBB10_6: -; RV32XTHEADBB-NEXT: or a0, a4, a0 -; RV32XTHEADBB-NEXT: or a1, a3, a2 +; RV32XTHEADBB-NEXT: srl a1, a1, a5 +; RV32XTHEADBB-NEXT: srai a4, a6, 31 +; RV32XTHEADBB-NEXT: and a1, a4, a1 +; RV32XTHEADBB-NEXT: or a1, a3, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotl_64_mask: @@ -897,30 +879,28 @@ ; RV32I-NEXT: srl a3, a6, a3 ; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: sll a5, a0, a2 -; RV32I-NEXT: slti a4, a4, 0 -; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: and a4, a4, a5 -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: srl a2, a1, a6 -; RV32I-NEXT: andi a5, a6, 63 -; RV32I-NEXT: addi a7, a5, -32 -; RV32I-NEXT: slti t0, a7, 0 -; RV32I-NEXT: neg t0, t0 -; RV32I-NEXT: and a2, t0, a2 -; RV32I-NEXT: bltz a7, .LBB11_5 +; RV32I-NEXT: sll a7, a0, a2 +; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a6, a4, 63 +; RV32I-NEXT: addi a5, a6, -32 +; RV32I-NEXT: and a2, t0, a7 +; RV32I-NEXT: bltz a5, .LBB11_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a0, a1, a5 +; RV32I-NEXT: srl a0, a1, a6 ; RV32I-NEXT: j .LBB11_6 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: srl a0, a0, a6 -; RV32I-NEXT: not a5, a5 -; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a5 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: not a6, a6 +; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a0, a0, a6 ; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a3, a2 +; RV32I-NEXT: srl a1, a1, a4 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a1, a5, a1 +; RV32I-NEXT: or a1, a3, a1 +; RV32I-NEXT: or a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: rotl_64_mask_and_127_and_63: @@ -946,30 +926,28 @@ ; RV32ZBB-NEXT: srl a3, a6, a3 ; RV32ZBB-NEXT: or a3, a5, a3 ; RV32ZBB-NEXT: .LBB11_3: -; RV32ZBB-NEXT: sll a5, a0, a2 -; RV32ZBB-NEXT: slti a4, a4, 0 -; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: and a4, a4, a5 -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: srl a2, a1, a6 -; RV32ZBB-NEXT: andi a5, a6, 63 -; RV32ZBB-NEXT: addi a7, a5, -32 -; RV32ZBB-NEXT: slti t0, a7, 0 -; RV32ZBB-NEXT: neg t0, t0 -; RV32ZBB-NEXT: and a2, t0, a2 -; RV32ZBB-NEXT: bltz a7, .LBB11_5 +; RV32ZBB-NEXT: sll a7, a0, a2 +; RV32ZBB-NEXT: srai t0, a4, 31 +; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: andi a6, a4, 63 +; RV32ZBB-NEXT: addi a5, a6, -32 +; RV32ZBB-NEXT: and a2, t0, a7 +; RV32ZBB-NEXT: bltz a5, .LBB11_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a0, a1, a5 +; RV32ZBB-NEXT: srl a0, a1, a6 ; RV32ZBB-NEXT: j .LBB11_6 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: srl a0, a0, a6 -; RV32ZBB-NEXT: not a5, a5 -; RV32ZBB-NEXT: slli a1, a1, 1 -; RV32ZBB-NEXT: sll a1, a1, a5 -; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: srl a0, a0, a4 +; RV32ZBB-NEXT: not a6, a6 +; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: sll a6, a7, a6 +; RV32ZBB-NEXT: or a0, a0, a6 ; RV32ZBB-NEXT: .LBB11_6: -; RV32ZBB-NEXT: or a0, a4, a0 -; RV32ZBB-NEXT: or a1, a3, a2 +; RV32ZBB-NEXT: srl a1, a1, a4 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a1, a5, a1 +; RV32ZBB-NEXT: or a1, a3, a1 +; RV32ZBB-NEXT: or a0, a2, a0 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: rotl_64_mask_and_127_and_63: @@ -992,30 +970,28 @@ ; RV32XTHEADBB-NEXT: srl a3, a6, a3 ; RV32XTHEADBB-NEXT: or a3, a5, a3 ; RV32XTHEADBB-NEXT: .LBB11_3: -; RV32XTHEADBB-NEXT: sll a5, a0, a2 -; RV32XTHEADBB-NEXT: slti a4, a4, 0 -; RV32XTHEADBB-NEXT: neg a4, a4 -; RV32XTHEADBB-NEXT: and a4, a4, a5 -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: srl a2, a1, a6 -; RV32XTHEADBB-NEXT: andi a5, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, a5, -32 -; RV32XTHEADBB-NEXT: slti t0, a7, 0 -; RV32XTHEADBB-NEXT: neg t0, t0 -; RV32XTHEADBB-NEXT: and a2, t0, a2 -; RV32XTHEADBB-NEXT: bltz a7, .LBB11_5 +; RV32XTHEADBB-NEXT: sll a7, a0, a2 +; RV32XTHEADBB-NEXT: srai t0, a4, 31 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: andi a6, a4, 63 +; RV32XTHEADBB-NEXT: addi a5, a6, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB11_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: srl a0, a1, a5 +; RV32XTHEADBB-NEXT: srl a0, a1, a6 ; RV32XTHEADBB-NEXT: j .LBB11_6 ; RV32XTHEADBB-NEXT: .LBB11_5: -; RV32XTHEADBB-NEXT: srl a0, a0, a6 -; RV32XTHEADBB-NEXT: not a5, a5 -; RV32XTHEADBB-NEXT: slli a1, a1, 1 -; RV32XTHEADBB-NEXT: sll a1, a1, a5 -; RV32XTHEADBB-NEXT: or a0, a0, a1 +; RV32XTHEADBB-NEXT: srl a0, a0, a4 +; RV32XTHEADBB-NEXT: not a6, a6 +; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: sll a6, a7, a6 +; RV32XTHEADBB-NEXT: or a0, a0, a6 ; RV32XTHEADBB-NEXT: .LBB11_6: -; RV32XTHEADBB-NEXT: or a0, a4, a0 -; RV32XTHEADBB-NEXT: or a1, a3, a2 +; RV32XTHEADBB-NEXT: srl a1, a1, a4 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a1, a5, a1 +; RV32XTHEADBB-NEXT: or a1, a3, a1 +; RV32XTHEADBB-NEXT: or a0, a2, a0 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: rotl_64_mask_and_127_and_63: @@ -1093,11 +1069,9 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64_mask: ; RV32I: # %bb.0: +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: srl a4, a1, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a5, a3, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a3, .LBB13_2 +; RV32I-NEXT: bltz a5, .LBB13_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: j .LBB13_3 @@ -1108,24 +1082,24 @@ ; RV32I-NEXT: sll a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi t0, a6, 63 -; RV32I-NEXT: addi a7, t0, -32 -; RV32I-NEXT: and a2, a5, a4 -; RV32I-NEXT: bltz a7, .LBB13_5 +; RV32I-NEXT: srai t0, a5, 31 +; RV32I-NEXT: neg a5, a2 +; RV32I-NEXT: andi a7, a5, 63 +; RV32I-NEXT: addi a6, a7, -32 +; RV32I-NEXT: and a2, t0, a4 +; RV32I-NEXT: bltz a6, .LBB13_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a1, a0, t0 +; RV32I-NEXT: sll a1, a0, a7 ; RV32I-NEXT: j .LBB13_6 ; RV32I-NEXT: .LBB13_5: -; RV32I-NEXT: sll a1, a1, a6 -; RV32I-NEXT: not a4, t0 -; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: srl a4, a5, a4 +; RV32I-NEXT: sll a1, a1, a5 +; RV32I-NEXT: not a4, a7 +; RV32I-NEXT: srli a7, a0, 1 +; RV32I-NEXT: srl a4, a7, a4 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB13_6: -; RV32I-NEXT: sll a0, a0, a6 -; RV32I-NEXT: slti a4, a7, 0 -; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sll a0, a0, a5 +; RV32I-NEXT: srai a4, a6, 31 ; RV32I-NEXT: and a0, a4, a0 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a2, a1 @@ -1141,11 +1115,9 @@ ; ; RV32ZBB-LABEL: rotr_64_mask: ; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: srl a4, a1, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a5, a3, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a3, .LBB13_2 +; RV32ZBB-NEXT: bltz a5, .LBB13_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 ; RV32ZBB-NEXT: j .LBB13_3 @@ -1156,24 +1128,24 @@ ; RV32ZBB-NEXT: sll a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB13_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi t0, a6, 63 -; RV32ZBB-NEXT: addi a7, t0, -32 -; RV32ZBB-NEXT: and a2, a5, a4 -; RV32ZBB-NEXT: bltz a7, .LBB13_5 +; RV32ZBB-NEXT: srai t0, a5, 31 +; RV32ZBB-NEXT: neg a5, a2 +; RV32ZBB-NEXT: andi a7, a5, 63 +; RV32ZBB-NEXT: addi a6, a7, -32 +; RV32ZBB-NEXT: and a2, t0, a4 +; RV32ZBB-NEXT: bltz a6, .LBB13_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a1, a0, t0 +; RV32ZBB-NEXT: sll a1, a0, a7 ; RV32ZBB-NEXT: j .LBB13_6 ; RV32ZBB-NEXT: .LBB13_5: -; RV32ZBB-NEXT: sll a1, a1, a6 -; RV32ZBB-NEXT: not a4, t0 -; RV32ZBB-NEXT: srli a5, a0, 1 -; RV32ZBB-NEXT: srl a4, a5, a4 +; RV32ZBB-NEXT: sll a1, a1, a5 +; RV32ZBB-NEXT: not a4, a7 +; RV32ZBB-NEXT: srli a7, a0, 1 +; RV32ZBB-NEXT: srl a4, a7, a4 ; RV32ZBB-NEXT: or a1, a1, a4 ; RV32ZBB-NEXT: .LBB13_6: -; RV32ZBB-NEXT: sll a0, a0, a6 -; RV32ZBB-NEXT: slti a4, a7, 0 -; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sll a0, a0, a5 +; RV32ZBB-NEXT: srai a4, a6, 31 ; RV32ZBB-NEXT: and a0, a4, a0 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a2, a1 @@ -1186,11 +1158,9 @@ ; ; RV32XTHEADBB-LABEL: rotr_64_mask: ; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: srl a4, a1, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a5, a3, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a3, .LBB13_2 +; RV32XTHEADBB-NEXT: bltz a5, .LBB13_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 ; RV32XTHEADBB-NEXT: j .LBB13_3 @@ -1201,24 +1171,24 @@ ; RV32XTHEADBB-NEXT: sll a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB13_3: -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: andi t0, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, t0, -32 -; RV32XTHEADBB-NEXT: and a2, a5, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB13_5 +; RV32XTHEADBB-NEXT: srai t0, a5, 31 +; RV32XTHEADBB-NEXT: neg a5, a2 +; RV32XTHEADBB-NEXT: andi a7, a5, 63 +; RV32XTHEADBB-NEXT: addi a6, a7, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB13_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: sll a1, a0, t0 +; RV32XTHEADBB-NEXT: sll a1, a0, a7 ; RV32XTHEADBB-NEXT: j .LBB13_6 ; RV32XTHEADBB-NEXT: .LBB13_5: -; RV32XTHEADBB-NEXT: sll a1, a1, a6 -; RV32XTHEADBB-NEXT: not a4, t0 -; RV32XTHEADBB-NEXT: srli a5, a0, 1 -; RV32XTHEADBB-NEXT: srl a4, a5, a4 +; RV32XTHEADBB-NEXT: sll a1, a1, a5 +; RV32XTHEADBB-NEXT: not a4, a7 +; RV32XTHEADBB-NEXT: srli a7, a0, 1 +; RV32XTHEADBB-NEXT: srl a4, a7, a4 ; RV32XTHEADBB-NEXT: or a1, a1, a4 ; RV32XTHEADBB-NEXT: .LBB13_6: -; RV32XTHEADBB-NEXT: sll a0, a0, a6 -; RV32XTHEADBB-NEXT: slti a4, a7, 0 -; RV32XTHEADBB-NEXT: neg a4, a4 +; RV32XTHEADBB-NEXT: sll a0, a0, a5 +; RV32XTHEADBB-NEXT: srai a4, a6, 31 ; RV32XTHEADBB-NEXT: and a0, a4, a0 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 @@ -1242,41 +1212,39 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64_mask_and_127_and_63: ; RV32I: # %bb.0: -; RV32I-NEXT: srl a4, a1, a2 ; RV32I-NEXT: andi a3, a2, 127 -; RV32I-NEXT: addi a6, a3, -32 -; RV32I-NEXT: slti a5, a6, 0 -; RV32I-NEXT: neg a5, a5 -; RV32I-NEXT: bltz a6, .LBB14_2 +; RV32I-NEXT: addi a4, a3, -32 +; RV32I-NEXT: bltz a4, .LBB14_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a3, a1, a3 ; RV32I-NEXT: j .LBB14_3 ; RV32I-NEXT: .LBB14_2: -; RV32I-NEXT: srl a6, a0, a2 -; RV32I-NEXT: slli a7, a1, 1 +; RV32I-NEXT: srl a5, a0, a2 +; RV32I-NEXT: slli a6, a1, 1 ; RV32I-NEXT: not a3, a3 -; RV32I-NEXT: sll a3, a7, a3 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: sll a3, a6, a3 +; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: .LBB14_3: -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: andi t0, a6, 63 -; RV32I-NEXT: addi a7, t0, -32 -; RV32I-NEXT: and a2, a5, a4 -; RV32I-NEXT: bltz a7, .LBB14_5 +; RV32I-NEXT: srl a7, a1, a2 +; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: andi a6, a4, 63 +; RV32I-NEXT: addi a5, a6, -32 +; RV32I-NEXT: and a2, t0, a7 +; RV32I-NEXT: bltz a5, .LBB14_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a1, a0, t0 +; RV32I-NEXT: sll a1, a0, a6 ; RV32I-NEXT: j .LBB14_6 ; RV32I-NEXT: .LBB14_5: -; RV32I-NEXT: sll a1, a1, a6 -; RV32I-NEXT: not a4, t0 -; RV32I-NEXT: srli a5, a0, 1 -; RV32I-NEXT: srl a4, a5, a4 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: sll a1, a1, a4 +; RV32I-NEXT: not a6, a6 +; RV32I-NEXT: srli a7, a0, 1 +; RV32I-NEXT: srl a6, a7, a6 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: .LBB14_6: -; RV32I-NEXT: sll a0, a0, a6 -; RV32I-NEXT: slti a4, a7, 0 -; RV32I-NEXT: neg a4, a4 -; RV32I-NEXT: and a0, a4, a0 +; RV32I-NEXT: sll a0, a0, a4 +; RV32I-NEXT: srai a5, a5, 31 +; RV32I-NEXT: and a0, a5, a0 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: ret @@ -1291,41 +1259,39 @@ ; ; RV32ZBB-LABEL: rotr_64_mask_and_127_and_63: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: srl a4, a1, a2 ; RV32ZBB-NEXT: andi a3, a2, 127 -; RV32ZBB-NEXT: addi a6, a3, -32 -; RV32ZBB-NEXT: slti a5, a6, 0 -; RV32ZBB-NEXT: neg a5, a5 -; RV32ZBB-NEXT: bltz a6, .LBB14_2 +; RV32ZBB-NEXT: addi a4, a3, -32 +; RV32ZBB-NEXT: bltz a4, .LBB14_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: srl a3, a1, a3 ; RV32ZBB-NEXT: j .LBB14_3 ; RV32ZBB-NEXT: .LBB14_2: -; RV32ZBB-NEXT: srl a6, a0, a2 -; RV32ZBB-NEXT: slli a7, a1, 1 +; RV32ZBB-NEXT: srl a5, a0, a2 +; RV32ZBB-NEXT: slli a6, a1, 1 ; RV32ZBB-NEXT: not a3, a3 -; RV32ZBB-NEXT: sll a3, a7, a3 -; RV32ZBB-NEXT: or a3, a6, a3 +; RV32ZBB-NEXT: sll a3, a6, a3 +; RV32ZBB-NEXT: or a3, a5, a3 ; RV32ZBB-NEXT: .LBB14_3: -; RV32ZBB-NEXT: neg a6, a2 -; RV32ZBB-NEXT: andi t0, a6, 63 -; RV32ZBB-NEXT: addi a7, t0, -32 -; RV32ZBB-NEXT: and a2, a5, a4 -; RV32ZBB-NEXT: bltz a7, .LBB14_5 +; RV32ZBB-NEXT: srl a7, a1, a2 +; RV32ZBB-NEXT: srai t0, a4, 31 +; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: andi a6, a4, 63 +; RV32ZBB-NEXT: addi a5, a6, -32 +; RV32ZBB-NEXT: and a2, t0, a7 +; RV32ZBB-NEXT: bltz a5, .LBB14_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a1, a0, t0 +; RV32ZBB-NEXT: sll a1, a0, a6 ; RV32ZBB-NEXT: j .LBB14_6 ; RV32ZBB-NEXT: .LBB14_5: -; RV32ZBB-NEXT: sll a1, a1, a6 -; RV32ZBB-NEXT: not a4, t0 -; RV32ZBB-NEXT: srli a5, a0, 1 -; RV32ZBB-NEXT: srl a4, a5, a4 -; RV32ZBB-NEXT: or a1, a1, a4 +; RV32ZBB-NEXT: sll a1, a1, a4 +; RV32ZBB-NEXT: not a6, a6 +; RV32ZBB-NEXT: srli a7, a0, 1 +; RV32ZBB-NEXT: srl a6, a7, a6 +; RV32ZBB-NEXT: or a1, a1, a6 ; RV32ZBB-NEXT: .LBB14_6: -; RV32ZBB-NEXT: sll a0, a0, a6 -; RV32ZBB-NEXT: slti a4, a7, 0 -; RV32ZBB-NEXT: neg a4, a4 -; RV32ZBB-NEXT: and a0, a4, a0 +; RV32ZBB-NEXT: sll a0, a0, a4 +; RV32ZBB-NEXT: srai a5, a5, 31 +; RV32ZBB-NEXT: and a0, a5, a0 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a2, a1 ; RV32ZBB-NEXT: ret @@ -1337,41 +1303,39 @@ ; ; RV32XTHEADBB-LABEL: rotr_64_mask_and_127_and_63: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: srl a4, a1, a2 ; RV32XTHEADBB-NEXT: andi a3, a2, 127 -; RV32XTHEADBB-NEXT: addi a6, a3, -32 -; RV32XTHEADBB-NEXT: slti a5, a6, 0 -; RV32XTHEADBB-NEXT: neg a5, a5 -; RV32XTHEADBB-NEXT: bltz a6, .LBB14_2 +; RV32XTHEADBB-NEXT: addi a4, a3, -32 +; RV32XTHEADBB-NEXT: bltz a4, .LBB14_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: srl a3, a1, a3 ; RV32XTHEADBB-NEXT: j .LBB14_3 ; RV32XTHEADBB-NEXT: .LBB14_2: -; RV32XTHEADBB-NEXT: srl a6, a0, a2 -; RV32XTHEADBB-NEXT: slli a7, a1, 1 +; RV32XTHEADBB-NEXT: srl a5, a0, a2 +; RV32XTHEADBB-NEXT: slli a6, a1, 1 ; RV32XTHEADBB-NEXT: not a3, a3 -; RV32XTHEADBB-NEXT: sll a3, a7, a3 -; RV32XTHEADBB-NEXT: or a3, a6, a3 +; RV32XTHEADBB-NEXT: sll a3, a6, a3 +; RV32XTHEADBB-NEXT: or a3, a5, a3 ; RV32XTHEADBB-NEXT: .LBB14_3: -; RV32XTHEADBB-NEXT: neg a6, a2 -; RV32XTHEADBB-NEXT: andi t0, a6, 63 -; RV32XTHEADBB-NEXT: addi a7, t0, -32 -; RV32XTHEADBB-NEXT: and a2, a5, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB14_5 +; RV32XTHEADBB-NEXT: srl a7, a1, a2 +; RV32XTHEADBB-NEXT: srai t0, a4, 31 +; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: andi a6, a4, 63 +; RV32XTHEADBB-NEXT: addi a5, a6, -32 +; RV32XTHEADBB-NEXT: and a2, t0, a7 +; RV32XTHEADBB-NEXT: bltz a5, .LBB14_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: sll a1, a0, t0 +; RV32XTHEADBB-NEXT: sll a1, a0, a6 ; RV32XTHEADBB-NEXT: j .LBB14_6 ; RV32XTHEADBB-NEXT: .LBB14_5: -; RV32XTHEADBB-NEXT: sll a1, a1, a6 -; RV32XTHEADBB-NEXT: not a4, t0 -; RV32XTHEADBB-NEXT: srli a5, a0, 1 -; RV32XTHEADBB-NEXT: srl a4, a5, a4 -; RV32XTHEADBB-NEXT: or a1, a1, a4 +; RV32XTHEADBB-NEXT: sll a1, a1, a4 +; RV32XTHEADBB-NEXT: not a6, a6 +; RV32XTHEADBB-NEXT: srli a7, a0, 1 +; RV32XTHEADBB-NEXT: srl a6, a7, a6 +; RV32XTHEADBB-NEXT: or a1, a1, a6 ; RV32XTHEADBB-NEXT: .LBB14_6: -; RV32XTHEADBB-NEXT: sll a0, a0, a6 -; RV32XTHEADBB-NEXT: slti a4, a7, 0 -; RV32XTHEADBB-NEXT: neg a4, a4 -; RV32XTHEADBB-NEXT: and a0, a4, a0 +; RV32XTHEADBB-NEXT: sll a0, a0, a4 +; RV32XTHEADBB-NEXT: srai a5, a5, 31 +; RV32XTHEADBB-NEXT: and a0, a5, a0 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 ; RV32XTHEADBB-NEXT: ret @@ -1550,8 +1514,7 @@ ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB17_7: ; RV32I-NEXT: sll a2, a2, a4 -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: add a0, a1, a0 ; RV32I-NEXT: sltu a1, a0, a1 @@ -1605,8 +1568,7 @@ ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB17_7: ; RV32ZBB-NEXT: sll a2, a2, a4 -; RV32ZBB-NEXT: slti a0, a0, 0 -; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: srai a0, a0, 31 ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: add a0, a1, a0 ; RV32ZBB-NEXT: sltu a1, a0, a1 @@ -1656,8 +1618,7 @@ ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB17_7: ; RV32XTHEADBB-NEXT: sll a2, a2, a4 -; RV32XTHEADBB-NEXT: slti a0, a0, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: srai a0, a0, 31 ; RV32XTHEADBB-NEXT: and a0, a0, a2 ; RV32XTHEADBB-NEXT: add a0, a1, a0 ; RV32XTHEADBB-NEXT: sltu a1, a0, a1 @@ -1782,8 +1743,7 @@ ; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: .LBB19_7: ; RV32I-NEXT: sll a2, a2, a4 -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: add a0, a6, a0 ; RV32I-NEXT: sltu a2, a0, a6 @@ -1836,8 +1796,7 @@ ; RV32ZBB-NEXT: or a3, a3, a5 ; RV32ZBB-NEXT: .LBB19_7: ; RV32ZBB-NEXT: sll a2, a2, a4 -; RV32ZBB-NEXT: slti a0, a0, 0 -; RV32ZBB-NEXT: neg a0, a0 +; RV32ZBB-NEXT: srai a0, a0, 31 ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: add a0, a6, a0 ; RV32ZBB-NEXT: sltu a2, a0, a6 @@ -1887,8 +1846,7 @@ ; RV32XTHEADBB-NEXT: or a3, a3, a5 ; RV32XTHEADBB-NEXT: .LBB19_7: ; RV32XTHEADBB-NEXT: sll a2, a2, a4 -; RV32XTHEADBB-NEXT: slti a0, a0, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 +; RV32XTHEADBB-NEXT: srai a0, a0, 31 ; RV32XTHEADBB-NEXT: and a0, a0, a2 ; RV32XTHEADBB-NEXT: add a0, a6, a0 ; RV32XTHEADBB-NEXT: sltu a2, a0, a6 @@ -2404,11 +2362,9 @@ ; RV32I-LABEL: rotl_64_zext: ; RV32I: # %bb.0: ; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: sll a5, a0, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a6, a3, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: bltz a3, .LBB24_2 +; RV32I-NEXT: bltz a6, .LBB24_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: j .LBB24_3 @@ -2419,13 +2375,14 @@ ; RV32I-NEXT: srl a7, t0, a7 ; RV32I-NEXT: or a3, a3, a7 ; RV32I-NEXT: .LBB24_3: +; RV32I-NEXT: srai a6, a6, 31 ; RV32I-NEXT: and a5, a6, a5 ; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub a7, a6, a2 -; RV32I-NEXT: srl a6, a1, a4 -; RV32I-NEXT: bltz a7, .LBB24_5 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: srl a7, a1, a4 +; RV32I-NEXT: bltz a6, .LBB24_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a0, a7 ; RV32I-NEXT: j .LBB24_6 ; RV32I-NEXT: .LBB24_5: ; RV32I-NEXT: li t0, 64 @@ -2436,9 +2393,8 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB24_6: -; RV32I-NEXT: slti a1, a7, 0 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: and a1, a1, a6 +; RV32I-NEXT: srai a1, a6, 31 +; RV32I-NEXT: and a1, a1, a7 ; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a0, a5, a0 ; RV32I-NEXT: ret @@ -2454,11 +2410,9 @@ ; RV32ZBB-LABEL: rotl_64_zext: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: sll a5, a0, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a6, a3, 0 -; RV32ZBB-NEXT: neg a6, a6 -; RV32ZBB-NEXT: bltz a3, .LBB24_2 +; RV32ZBB-NEXT: bltz a6, .LBB24_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a5 ; RV32ZBB-NEXT: j .LBB24_3 @@ -2469,13 +2423,14 @@ ; RV32ZBB-NEXT: srl a7, t0, a7 ; RV32ZBB-NEXT: or a3, a3, a7 ; RV32ZBB-NEXT: .LBB24_3: +; RV32ZBB-NEXT: srai a6, a6, 31 ; RV32ZBB-NEXT: and a5, a6, a5 ; RV32ZBB-NEXT: li a6, 32 -; RV32ZBB-NEXT: sub a7, a6, a2 -; RV32ZBB-NEXT: srl a6, a1, a4 -; RV32ZBB-NEXT: bltz a7, .LBB24_5 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: srl a7, a1, a4 +; RV32ZBB-NEXT: bltz a6, .LBB24_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a0, a6 +; RV32ZBB-NEXT: mv a0, a7 ; RV32ZBB-NEXT: j .LBB24_6 ; RV32ZBB-NEXT: .LBB24_5: ; RV32ZBB-NEXT: li t0, 64 @@ -2486,9 +2441,8 @@ ; RV32ZBB-NEXT: sll a1, a1, a2 ; RV32ZBB-NEXT: or a0, a0, a1 ; RV32ZBB-NEXT: .LBB24_6: -; RV32ZBB-NEXT: slti a1, a7, 0 -; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: and a1, a1, a6 +; RV32ZBB-NEXT: srai a1, a6, 31 +; RV32ZBB-NEXT: and a1, a1, a7 ; RV32ZBB-NEXT: or a1, a3, a1 ; RV32ZBB-NEXT: or a0, a5, a0 ; RV32ZBB-NEXT: ret @@ -2501,11 +2455,9 @@ ; RV32XTHEADBB-LABEL: rotl_64_zext: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: sll a5, a0, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a6, a3, 0 -; RV32XTHEADBB-NEXT: neg a6, a6 -; RV32XTHEADBB-NEXT: bltz a3, .LBB24_2 +; RV32XTHEADBB-NEXT: bltz a6, .LBB24_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a5 ; RV32XTHEADBB-NEXT: j .LBB24_3 @@ -2516,13 +2468,14 @@ ; RV32XTHEADBB-NEXT: srl a7, t0, a7 ; RV32XTHEADBB-NEXT: or a3, a3, a7 ; RV32XTHEADBB-NEXT: .LBB24_3: +; RV32XTHEADBB-NEXT: srai a6, a6, 31 ; RV32XTHEADBB-NEXT: and a5, a6, a5 ; RV32XTHEADBB-NEXT: li a6, 32 -; RV32XTHEADBB-NEXT: sub a7, a6, a2 -; RV32XTHEADBB-NEXT: srl a6, a1, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB24_5 +; RV32XTHEADBB-NEXT: sub a6, a6, a2 +; RV32XTHEADBB-NEXT: srl a7, a1, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB24_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a0, a6 +; RV32XTHEADBB-NEXT: mv a0, a7 ; RV32XTHEADBB-NEXT: j .LBB24_6 ; RV32XTHEADBB-NEXT: .LBB24_5: ; RV32XTHEADBB-NEXT: li t0, 64 @@ -2533,9 +2486,8 @@ ; RV32XTHEADBB-NEXT: sll a1, a1, a2 ; RV32XTHEADBB-NEXT: or a0, a0, a1 ; RV32XTHEADBB-NEXT: .LBB24_6: -; RV32XTHEADBB-NEXT: slti a1, a7, 0 -; RV32XTHEADBB-NEXT: neg a1, a1 -; RV32XTHEADBB-NEXT: and a1, a1, a6 +; RV32XTHEADBB-NEXT: srai a1, a6, 31 +; RV32XTHEADBB-NEXT: and a1, a1, a7 ; RV32XTHEADBB-NEXT: or a1, a3, a1 ; RV32XTHEADBB-NEXT: or a0, a5, a0 ; RV32XTHEADBB-NEXT: ret @@ -2560,11 +2512,9 @@ ; RV32I-LABEL: rotr_64_zext: ; RV32I: # %bb.0: ; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: srl a5, a1, a2 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: slti a6, a3, 0 -; RV32I-NEXT: neg a6, a6 -; RV32I-NEXT: bltz a3, .LBB25_2 +; RV32I-NEXT: bltz a6, .LBB25_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: j .LBB25_3 @@ -2575,13 +2525,14 @@ ; RV32I-NEXT: sll a7, t0, a7 ; RV32I-NEXT: or a3, a3, a7 ; RV32I-NEXT: .LBB25_3: +; RV32I-NEXT: srai a6, a6, 31 ; RV32I-NEXT: and a5, a6, a5 ; RV32I-NEXT: li a6, 32 -; RV32I-NEXT: sub a7, a6, a2 -; RV32I-NEXT: sll a6, a0, a4 -; RV32I-NEXT: bltz a7, .LBB25_5 +; RV32I-NEXT: sub a6, a6, a2 +; RV32I-NEXT: sll a7, a0, a4 +; RV32I-NEXT: bltz a6, .LBB25_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a1, a6 +; RV32I-NEXT: mv a1, a7 ; RV32I-NEXT: j .LBB25_6 ; RV32I-NEXT: .LBB25_5: ; RV32I-NEXT: li t0, 64 @@ -2592,9 +2543,8 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB25_6: -; RV32I-NEXT: slti a0, a7, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, a6 +; RV32I-NEXT: srai a0, a6, 31 +; RV32I-NEXT: and a0, a0, a7 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a5, a1 ; RV32I-NEXT: ret @@ -2610,11 +2560,9 @@ ; RV32ZBB-LABEL: rotr_64_zext: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: srl a5, a1, a2 -; RV32ZBB-NEXT: addi a3, a2, -32 -; RV32ZBB-NEXT: slti a6, a3, 0 -; RV32ZBB-NEXT: neg a6, a6 -; RV32ZBB-NEXT: bltz a3, .LBB25_2 +; RV32ZBB-NEXT: bltz a6, .LBB25_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a5 ; RV32ZBB-NEXT: j .LBB25_3 @@ -2625,13 +2573,14 @@ ; RV32ZBB-NEXT: sll a7, t0, a7 ; RV32ZBB-NEXT: or a3, a3, a7 ; RV32ZBB-NEXT: .LBB25_3: +; RV32ZBB-NEXT: srai a6, a6, 31 ; RV32ZBB-NEXT: and a5, a6, a5 ; RV32ZBB-NEXT: li a6, 32 -; RV32ZBB-NEXT: sub a7, a6, a2 -; RV32ZBB-NEXT: sll a6, a0, a4 -; RV32ZBB-NEXT: bltz a7, .LBB25_5 +; RV32ZBB-NEXT: sub a6, a6, a2 +; RV32ZBB-NEXT: sll a7, a0, a4 +; RV32ZBB-NEXT: bltz a6, .LBB25_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: mv a1, a6 +; RV32ZBB-NEXT: mv a1, a7 ; RV32ZBB-NEXT: j .LBB25_6 ; RV32ZBB-NEXT: .LBB25_5: ; RV32ZBB-NEXT: li t0, 64 @@ -2642,9 +2591,8 @@ ; RV32ZBB-NEXT: srl a0, a0, a2 ; RV32ZBB-NEXT: or a1, a1, a0 ; RV32ZBB-NEXT: .LBB25_6: -; RV32ZBB-NEXT: slti a0, a7, 0 -; RV32ZBB-NEXT: neg a0, a0 -; RV32ZBB-NEXT: and a0, a0, a6 +; RV32ZBB-NEXT: srai a0, a6, 31 +; RV32ZBB-NEXT: and a0, a0, a7 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a5, a1 ; RV32ZBB-NEXT: ret @@ -2657,11 +2605,9 @@ ; RV32XTHEADBB-LABEL: rotr_64_zext: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: srl a5, a1, a2 -; RV32XTHEADBB-NEXT: addi a3, a2, -32 -; RV32XTHEADBB-NEXT: slti a6, a3, 0 -; RV32XTHEADBB-NEXT: neg a6, a6 -; RV32XTHEADBB-NEXT: bltz a3, .LBB25_2 +; RV32XTHEADBB-NEXT: bltz a6, .LBB25_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a5 ; RV32XTHEADBB-NEXT: j .LBB25_3 @@ -2672,13 +2618,14 @@ ; RV32XTHEADBB-NEXT: sll a7, t0, a7 ; RV32XTHEADBB-NEXT: or a3, a3, a7 ; RV32XTHEADBB-NEXT: .LBB25_3: +; RV32XTHEADBB-NEXT: srai a6, a6, 31 ; RV32XTHEADBB-NEXT: and a5, a6, a5 ; RV32XTHEADBB-NEXT: li a6, 32 -; RV32XTHEADBB-NEXT: sub a7, a6, a2 -; RV32XTHEADBB-NEXT: sll a6, a0, a4 -; RV32XTHEADBB-NEXT: bltz a7, .LBB25_5 +; RV32XTHEADBB-NEXT: sub a6, a6, a2 +; RV32XTHEADBB-NEXT: sll a7, a0, a4 +; RV32XTHEADBB-NEXT: bltz a6, .LBB25_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: mv a1, a6 +; RV32XTHEADBB-NEXT: mv a1, a7 ; RV32XTHEADBB-NEXT: j .LBB25_6 ; RV32XTHEADBB-NEXT: .LBB25_5: ; RV32XTHEADBB-NEXT: li t0, 64 @@ -2689,9 +2636,8 @@ ; RV32XTHEADBB-NEXT: srl a0, a0, a2 ; RV32XTHEADBB-NEXT: or a1, a1, a0 ; RV32XTHEADBB-NEXT: .LBB25_6: -; RV32XTHEADBB-NEXT: slti a0, a7, 0 -; RV32XTHEADBB-NEXT: neg a0, a0 -; RV32XTHEADBB-NEXT: and a0, a0, a6 +; RV32XTHEADBB-NEXT: srai a0, a6, 31 +; RV32XTHEADBB-NEXT: and a0, a0, a7 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a5, a1 ; RV32XTHEADBB-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/RISCV/selectcc-to-shiftand.ll @@ -78,15 +78,13 @@ define i32 @pos_sel_constants(i32 signext %a) { ; RV32-LABEL: pos_sel_constants: ; RV32: # %bb.0: -; RV32-NEXT: slti a0, a0, 0 -; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: srai a0, a0, 31 ; RV32-NEXT: andi a0, a0, 5 ; RV32-NEXT: ret ; ; RV64-LABEL: pos_sel_constants: ; RV64: # %bb.0: -; RV64-NEXT: slti a0, a0, 0 -; RV64-NEXT: addiw a0, a0, -1 +; RV64-NEXT: srai a0, a0, 63 ; RV64-NEXT: andi a0, a0, 5 ; RV64-NEXT: ret %tmp.1 = icmp sgt i32 %a, -1 @@ -121,15 +119,13 @@ define i32 @pos_sel_variable_and_zero(i32 signext %a, i32 signext %b) { ; RV32I-LABEL: pos_sel_variable_and_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: slti a0, a0, 0 -; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: and a0, a0, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: pos_sel_variable_and_zero: ; RV64I: # %bb.0: -; RV64I-NEXT: slti a0, a0, 0 -; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: srai a0, a0, 63 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/shift-amount-mod.ll b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll --- a/llvm/test/CodeGen/RISCV/shift-amount-mod.ll +++ b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll @@ -25,12 +25,12 @@ ; RV32I-LABEL: shl_by_complemented_64: ; RV32I: # %bb.0: ; RV32I-NEXT: not a4, a0 -; RV32I-NEXT: li a3, 31 -; RV32I-NEXT: sub a3, a3, a0 -; RV32I-NEXT: sll a2, a0, a4 -; RV32I-NEXT: bltz a3, .LBB1_2 +; RV32I-NEXT: li a2, 31 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: sll a3, a0, a4 +; RV32I-NEXT: bltz a2, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: j .LBB1_3 ; RV32I-NEXT: .LBB1_2: ; RV32I-NEXT: sll a1, a1, a4 @@ -41,9 +41,8 @@ ; RV32I-NEXT: srl a0, a0, a4 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB1_3: -; RV32I-NEXT: slti a0, a3, 0 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srai a0, a2, 31 +; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl_by_complemented_64: @@ -77,12 +76,12 @@ ; RV32I-LABEL: lshr_by_complemented_64: ; RV32I: # %bb.0: ; RV32I-NEXT: not a4, a0 -; RV32I-NEXT: li a3, 31 -; RV32I-NEXT: sub a3, a3, a0 -; RV32I-NEXT: srl a2, a1, a4 -; RV32I-NEXT: bltz a3, .LBB3_2 +; RV32I-NEXT: li a2, 31 +; RV32I-NEXT: sub a2, a2, a0 +; RV32I-NEXT: srl a3, a1, a4 +; RV32I-NEXT: bltz a2, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: ; RV32I-NEXT: srl a4, a0, a4 @@ -93,9 +92,8 @@ ; RV32I-NEXT: sll a0, a1, a0 ; RV32I-NEXT: or a0, a4, a0 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: slti a1, a3, 0 -; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: srai a1, a2, 31 +; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr_by_complemented_64: @@ -197,8 +195,7 @@ ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB7_3: ; RV32I-NEXT: sll a0, a0, a3 -; RV32I-NEXT: slti a2, a2, 0 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: srai a2, a2, 31 ; RV32I-NEXT: and a0, a2, a0 ; RV32I-NEXT: ret ; @@ -233,8 +230,7 @@ ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: .LBB8_3: ; RV32I-NEXT: srl a1, a1, a3 -; RV32I-NEXT: slti a2, a2, 0 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: srai a2, a2, 31 ; RV32I-NEXT: and a1, a2, a1 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll --- a/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll +++ b/llvm/test/CodeGen/RISCV/shift-masked-shamt.ll @@ -175,9 +175,8 @@ ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB9_3: ; RV32I-NEXT: sll a0, a0, a2 -; RV32I-NEXT: slti a2, a3, 0 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: and a0, a3, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sll_redundant_mask_zeros_i64: @@ -209,9 +208,8 @@ ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: .LBB10_3: ; RV32I-NEXT: srl a1, a1, a2 -; RV32I-NEXT: slti a2, a3, 0 -; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: and a1, a3, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: srl_redundant_mask_zeros_i64: diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -26,8 +26,7 @@ ; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: .LBB0_3: -; RV32I-NEXT: slti a1, a4, 0 -; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: srai a1, a4, 31 ; RV32I-NEXT: and a1, a1, a3 ; RV32I-NEXT: ret ; @@ -119,8 +118,7 @@ ; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: or a1, a1, a0 ; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: slti a0, a4, 0 -; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: srai a0, a4, 31 ; RV32I-NEXT: and a0, a0, a3 ; RV32I-NEXT: ret ; @@ -285,8 +283,7 @@ ; RV64I-NEXT: sll a1, a1, a2 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: slti a1, a4, 0 -; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: srai a1, a4, 63 ; RV64I-NEXT: and a1, a1, a3 ; RV64I-NEXT: ret %1 = lshr i128 %a, %b @@ -574,8 +571,7 @@ ; RV64I-NEXT: srl a0, a0, a2 ; RV64I-NEXT: or a1, a1, a0 ; RV64I-NEXT: .LBB8_3: -; RV64I-NEXT: slti a0, a4, 0 -; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: srai a0, a4, 63 ; RV64I-NEXT: and a0, a0, a3 ; RV64I-NEXT: ret %1 = shl i128 %a, %b diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -307,17 +307,16 @@ ; RV32I-NEXT: sll a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a3, a1, 16 ; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: sb a1, 2(a2) ; RV32I-NEXT: srli a1, a0, 24 @@ -444,23 +443,22 @@ ; RV32I-NEXT: srl a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 -; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a0, 16 -; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, a1, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a1, 24 -; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %byteOff = load i64, ptr %byteOff.ptr, align 1 @@ -689,11 +687,9 @@ ; RV64I-NEXT: sll a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a3, a1, 56 ; RV64I-NEXT: sb a3, 15(a2) ; RV64I-NEXT: srli a3, a1, 48 @@ -708,6 +704,7 @@ ; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 ; RV64I-NEXT: sb a1, 7(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -909,39 +906,38 @@ ; RV64I-NEXT: srl a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB7_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 -; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a0, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a0, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: srli a0, a1, 56 -; RV64I-NEXT: sb a0, 7(a2) -; RV64I-NEXT: srli a0, a1, 48 -; RV64I-NEXT: sb a0, 6(a2) -; RV64I-NEXT: srli a0, a1, 40 -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: srli a0, a1, 32 -; RV64I-NEXT: sb a0, 4(a2) -; RV64I-NEXT: srli a0, a1, 24 -; RV64I-NEXT: sb a0, 3(a2) -; RV64I-NEXT: srli a0, a1, 16 -; RV64I-NEXT: sb a0, 2(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 6(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 5(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 3(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 14(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -296,17 +296,16 @@ ; RV32I-NEXT: sll a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 4(a2) -; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a3, a1, 16 ; RV32I-NEXT: sb a3, 6(a2) ; RV32I-NEXT: srli a3, a1, 24 ; RV32I-NEXT: sb a3, 7(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 5(a2) +; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: sb a1, 2(a2) ; RV32I-NEXT: srli a1, a0, 24 @@ -430,23 +429,22 @@ ; RV32I-NEXT: srl a3, a3, a5 ; RV32I-NEXT: or a0, a0, a3 ; RV32I-NEXT: .LBB4_3: -; RV32I-NEXT: slti a3, a4, 0 -; RV32I-NEXT: neg a3, a3 -; RV32I-NEXT: and a1, a3, a1 -; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srai a4, a4, 31 +; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: sb a1, 0(a2) -; RV32I-NEXT: srli a3, a0, 16 -; RV32I-NEXT: sb a3, 6(a2) -; RV32I-NEXT: srli a3, a0, 24 -; RV32I-NEXT: sb a3, 7(a2) -; RV32I-NEXT: srli a0, a0, 8 -; RV32I-NEXT: sb a0, 5(a2) -; RV32I-NEXT: srli a0, a1, 16 -; RV32I-NEXT: sb a0, 2(a2) -; RV32I-NEXT: srli a0, a1, 24 -; RV32I-NEXT: sb a0, 3(a2) +; RV32I-NEXT: sb a0, 4(a2) +; RV32I-NEXT: srli a3, a1, 16 +; RV32I-NEXT: sb a3, 2(a2) +; RV32I-NEXT: srli a3, a1, 24 +; RV32I-NEXT: sb a3, 3(a2) ; RV32I-NEXT: srli a1, a1, 8 ; RV32I-NEXT: sb a1, 1(a2) +; RV32I-NEXT: srli a1, a0, 16 +; RV32I-NEXT: sb a1, 6(a2) +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: sb a1, 7(a2) +; RV32I-NEXT: srli a0, a0, 8 +; RV32I-NEXT: sb a0, 5(a2) ; RV32I-NEXT: ret %src = load i64, ptr %src.ptr, align 1 %bitOff = load i64, ptr %bitOff.ptr, align 1 @@ -670,11 +668,9 @@ ; RV64I-NEXT: sll a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB6_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 8(a2) -; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a3, a1, 56 ; RV64I-NEXT: sb a3, 15(a2) ; RV64I-NEXT: srli a3, a1, 48 @@ -689,6 +685,7 @@ ; RV64I-NEXT: sb a3, 10(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 9(a2) +; RV64I-NEXT: sb a0, 0(a2) ; RV64I-NEXT: srli a1, a0, 56 ; RV64I-NEXT: sb a1, 7(a2) ; RV64I-NEXT: srli a1, a0, 48 @@ -954,39 +951,38 @@ ; RV64I-NEXT: srl a3, a3, a5 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: .LBB7_3: -; RV64I-NEXT: slti a3, a4, 0 -; RV64I-NEXT: neg a3, a3 -; RV64I-NEXT: and a1, a3, a1 -; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srai a4, a4, 63 +; RV64I-NEXT: and a1, a4, a1 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: srli a3, a0, 56 -; RV64I-NEXT: sb a3, 15(a2) -; RV64I-NEXT: srli a3, a0, 48 -; RV64I-NEXT: sb a3, 14(a2) -; RV64I-NEXT: srli a3, a0, 40 -; RV64I-NEXT: sb a3, 13(a2) -; RV64I-NEXT: srli a3, a0, 32 -; RV64I-NEXT: sb a3, 12(a2) -; RV64I-NEXT: srli a3, a0, 24 -; RV64I-NEXT: sb a3, 11(a2) -; RV64I-NEXT: srli a3, a0, 16 -; RV64I-NEXT: sb a3, 10(a2) -; RV64I-NEXT: srli a0, a0, 8 -; RV64I-NEXT: sb a0, 9(a2) -; RV64I-NEXT: srli a0, a1, 56 -; RV64I-NEXT: sb a0, 7(a2) -; RV64I-NEXT: srli a0, a1, 48 -; RV64I-NEXT: sb a0, 6(a2) -; RV64I-NEXT: srli a0, a1, 40 -; RV64I-NEXT: sb a0, 5(a2) -; RV64I-NEXT: srli a0, a1, 32 -; RV64I-NEXT: sb a0, 4(a2) -; RV64I-NEXT: srli a0, a1, 24 -; RV64I-NEXT: sb a0, 3(a2) -; RV64I-NEXT: srli a0, a1, 16 -; RV64I-NEXT: sb a0, 2(a2) +; RV64I-NEXT: sb a0, 8(a2) +; RV64I-NEXT: srli a3, a1, 56 +; RV64I-NEXT: sb a3, 7(a2) +; RV64I-NEXT: srli a3, a1, 48 +; RV64I-NEXT: sb a3, 6(a2) +; RV64I-NEXT: srli a3, a1, 40 +; RV64I-NEXT: sb a3, 5(a2) +; RV64I-NEXT: srli a3, a1, 32 +; RV64I-NEXT: sb a3, 4(a2) +; RV64I-NEXT: srli a3, a1, 24 +; RV64I-NEXT: sb a3, 3(a2) +; RV64I-NEXT: srli a3, a1, 16 +; RV64I-NEXT: sb a3, 2(a2) ; RV64I-NEXT: srli a1, a1, 8 ; RV64I-NEXT: sb a1, 1(a2) +; RV64I-NEXT: srli a1, a0, 56 +; RV64I-NEXT: sb a1, 15(a2) +; RV64I-NEXT: srli a1, a0, 48 +; RV64I-NEXT: sb a1, 14(a2) +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: sb a1, 13(a2) +; RV64I-NEXT: srli a1, a0, 32 +; RV64I-NEXT: sb a1, 12(a2) +; RV64I-NEXT: srli a1, a0, 24 +; RV64I-NEXT: sb a1, 11(a2) +; RV64I-NEXT: srli a1, a0, 16 +; RV64I-NEXT: sb a1, 10(a2) +; RV64I-NEXT: srli a0, a0, 8 +; RV64I-NEXT: sb a0, 9(a2) ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: