diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3899,16 +3899,28 @@ SDValue C) const { // Check integral scalar types. if (VT.isScalarInteger()) { - // Do not perform the transformation on riscv32 with the M extension. - if (!Subtarget.is64Bit() && Subtarget.hasStdExtM()) + // Omit the optimization if the sub target has the M extension and the data + // size exceeds XLen. + if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen()) return false; if (auto *ConstNode = dyn_cast(C.getNode())) { - if (ConstNode->getAPIntValue().getBitWidth() > 8 * sizeof(int64_t)) + // Break the MUL to a SLLI and an ADD/SUB. + const APInt &Imm = ConstNode->getAPIntValue(); + if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() || + (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2()) + return true; + // Omit the following optimization if the sub target has the M extension + // and the data size >= XLen. + if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen()) return false; - int64_t Imm = ConstNode->getSExtValue(); - if (isPowerOf2_64(Imm + 1) || isPowerOf2_64(Imm - 1) || - isPowerOf2_64(1 - Imm) || isPowerOf2_64(-1 - Imm)) + // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs + // a pair of LUI/ADDI. + if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) { + APInt ImmS = Imm.ashr(Imm.countTrailingZeros()); + if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() || + (1 - ImmS).isPowerOf2()) return true; + } } } diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -85,8 +85,8 @@ ; ; RV32IM-LABEL: mul_constant: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi a1, zero, 5 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 2 +; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: mul_constant: @@ -182,10 +182,12 @@ ; RV32IM-LABEL: mul64_constant: ; RV32IM: # %bb.0: ; RV32IM-NEXT: addi a2, zero, 5 -; RV32IM-NEXT: mul a1, a1, a2 -; RV32IM-NEXT: mulhu a3, a0, a2 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: slli a3, a1, 2 ; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: slli a2, a0, 2 +; RV32IM-NEXT: add a0, a2, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: mul64_constant: @@ -300,8 +302,8 @@ ; ; RV32IM-LABEL: muli32_p65: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi a1, zero, 65 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 6 +; RV32IM-NEXT: add a0, a1, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_p65: @@ -328,8 +330,8 @@ ; ; RV32IM-LABEL: muli32_p63: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi a1, zero, 63 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 6 +; RV32IM-NEXT: sub a0, a1, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_p63: @@ -364,10 +366,12 @@ ; RV32IM-LABEL: muli64_p65: ; RV32IM: # %bb.0: ; RV32IM-NEXT: addi a2, zero, 65 -; RV32IM-NEXT: mul a1, a1, a2 -; RV32IM-NEXT: mulhu a3, a0, a2 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: slli a3, a1, 6 ; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: slli a2, a0, 6 +; RV32IM-NEXT: add a0, a2, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_p65: @@ -401,10 +405,12 @@ ; RV32IM-LABEL: muli64_p63: ; RV32IM: # %bb.0: ; RV32IM-NEXT: addi a2, zero, 63 -; RV32IM-NEXT: mul a1, a1, a2 -; RV32IM-NEXT: mulhu a3, a0, a2 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: slli a3, a1, 6 +; RV32IM-NEXT: sub a1, a3, a1 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: slli a2, a0, 6 +; RV32IM-NEXT: sub a0, a2, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_p63: @@ -431,8 +437,8 @@ ; ; RV32IM-LABEL: muli32_m63: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi a1, zero, -63 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 6 +; RV32IM-NEXT: sub a0, a0, a1 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_m63: @@ -460,8 +466,9 @@ ; ; RV32IM-LABEL: muli32_m65: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi a1, zero, -65 -; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: slli a1, a0, 6 +; RV32IM-NEXT: add a0, a1, a0 +; RV32IM-NEXT: neg a0, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli32_m65: @@ -496,12 +503,14 @@ ; ; RV32IM-LABEL: muli64_m63: ; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a2, a1, 6 +; RV32IM-NEXT: sub a1, a1, a2 ; RV32IM-NEXT: addi a2, zero, -63 -; RV32IM-NEXT: mul a1, a1, a2 -; RV32IM-NEXT: mulhu a3, a0, a2 -; RV32IM-NEXT: sub a3, a3, a0 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: sub a2, a2, a0 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: slli a2, a0, 6 +; RV32IM-NEXT: sub a0, a0, a2 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_m63: @@ -538,12 +547,15 @@ ; ; RV32IM-LABEL: muli64_m65: ; RV32IM: # %bb.0: +; RV32IM-NEXT: slli a2, a1, 6 +; RV32IM-NEXT: add a1, a2, a1 ; RV32IM-NEXT: addi a2, zero, -65 -; RV32IM-NEXT: mul a1, a1, a2 -; RV32IM-NEXT: mulhu a3, a0, a2 -; RV32IM-NEXT: sub a3, a3, a0 -; RV32IM-NEXT: add a1, a3, a1 -; RV32IM-NEXT: mul a0, a0, a2 +; RV32IM-NEXT: mulhu a2, a0, a2 +; RV32IM-NEXT: sub a2, a2, a0 +; RV32IM-NEXT: sub a1, a2, a1 +; RV32IM-NEXT: slli a2, a0, 6 +; RV32IM-NEXT: add a0, a2, a0 +; RV32IM-NEXT: neg a0, a0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli64_m65: @@ -638,13 +650,9 @@ define i32 @muli32_p4352(i32 %a) nounwind { ; RV32I-LABEL: muli32_p4352: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 1 -; RV32I-NEXT: addi a1, a1, 256 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p4352: @@ -656,20 +664,16 @@ ; ; RV64I-LABEL: muli32_p4352: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: addw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p4352: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1 -; RV64IM-NEXT: addiw a1, a1, 256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: addw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 4352 ret i32 %1 @@ -678,13 +682,9 @@ define i32 @muli32_p3840(i32 %a) nounwind { ; RV32I-LABEL: muli32_p3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 1 -; RV32I-NEXT: addi a1, a1, -256 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_p3840: @@ -696,20 +696,16 @@ ; ; RV64I-LABEL: muli32_p3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_p3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1 -; RV64IM-NEXT: addiw a1, a1, -256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 8 +; RV64IM-NEXT: slli a0, a0, 12 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, 3840 ret i32 %1 @@ -718,13 +714,9 @@ define i32 @muli32_m3840(i32 %a) nounwind { ; RV32I-LABEL: muli32_m3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a1, 1048575 -; RV32I-NEXT: addi a1, a1, 256 -; RV32I-NEXT: call __mulsi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: slli a1, a0, 12 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: sub a0, a0, a1 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli32_m3840: @@ -736,20 +728,16 @@ ; ; RV64I-LABEL: muli32_m3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1048575 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: subw a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli32_m3840: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a1, 1048575 -; RV64IM-NEXT: addiw a1, a1, 256 -; RV64IM-NEXT: mul a0, a0, a1 +; RV64IM-NEXT: slli a1, a0, 12 +; RV64IM-NEXT: slli a0, a0, 8 +; RV64IM-NEXT: subw a0, a0, a1 ; RV64IM-NEXT: ret %1 = mul i32 %a, -3840 ret i32 %1 @@ -798,14 +786,18 @@ define i64 @muli64_p4352(i64 %a) nounwind { ; RV32I-LABEL: muli64_p4352: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a2, 1 -; RV32I-NEXT: addi a2, a2, 256 -; RV32I-NEXT: mv a3, zero -; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a3, a0, 20 +; RV32I-NEXT: slli a1, a1, 12 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a3, a0, 12 +; RV32I-NEXT: add a0, a3, a2 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_p4352: @@ -820,13 +812,9 @@ ; ; RV64I-LABEL: muli64_p4352: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_p4352: @@ -842,14 +830,18 @@ define i64 @muli64_p3840(i64 %a) nounwind { ; RV32I-LABEL: muli64_p3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a2, 1 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: mv a3, zero -; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a2, a0, 24 +; RV32I-NEXT: slli a3, a1, 8 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a3, a0, 20 +; RV32I-NEXT: slli a1, a1, 12 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 8 +; RV32I-NEXT: slli a0, a0, 12 +; RV32I-NEXT: sltu a3, a0, a2 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_p3840: @@ -864,13 +856,9 @@ ; ; RV64I-LABEL: muli64_p3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1 -; RV64I-NEXT: addiw a1, a1, -256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 8 +; RV64I-NEXT: slli a0, a0, 12 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_p3840: @@ -931,14 +919,18 @@ define i64 @muli64_m3840(i64 %a) nounwind { ; RV32I-LABEL: muli64_m3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lui a2, 1048575 -; RV32I-NEXT: addi a2, a2, 256 -; RV32I-NEXT: addi a3, zero, -1 -; RV32I-NEXT: call __muldi3@plt -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: srli a2, a0, 20 +; RV32I-NEXT: slli a3, a1, 12 +; RV32I-NEXT: or a2, a3, a2 +; RV32I-NEXT: srli a3, a0, 24 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 12 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: sltu a3, a0, a2 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli64_m3840: @@ -954,13 +946,9 @@ ; ; RV64I-LABEL: muli64_m3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a1, 1048575 -; RV64I-NEXT: addiw a1, a1, 256 -; RV64I-NEXT: call __muldi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a1, a0, 12 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli64_m3840: @@ -976,40 +964,50 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-LABEL: muli128_m3840: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -64 -; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a2, 4(a1) -; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw a4, 0(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: addi a0, zero, -1 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: lui a0, 1048575 -; RV32I-NEXT: addi a0, a0, 256 -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: sw a1, 36(sp) -; RV32I-NEXT: sw a4, 32(sp) -; RV32I-NEXT: sw a2, 28(sp) -; RV32I-NEXT: addi a0, sp, 40 -; RV32I-NEXT: addi a1, sp, 24 -; RV32I-NEXT: addi a2, sp, 8 -; RV32I-NEXT: sw a3, 24(sp) -; RV32I-NEXT: call __multi3@plt -; RV32I-NEXT: lw a0, 52(sp) -; RV32I-NEXT: lw a1, 48(sp) -; RV32I-NEXT: lw a2, 44(sp) -; RV32I-NEXT: lw a3, 40(sp) -; RV32I-NEXT: sw a0, 12(s0) -; RV32I-NEXT: sw a1, 8(s0) -; RV32I-NEXT: sw a2, 4(s0) -; RV32I-NEXT: sw a3, 0(s0) -; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: srli a6, a2, 20 +; RV32I-NEXT: slli a5, a3, 12 +; RV32I-NEXT: or a6, a5, a6 +; RV32I-NEXT: srli a7, a2, 24 +; RV32I-NEXT: slli a5, a3, 8 +; RV32I-NEXT: or a7, a5, a7 +; RV32I-NEXT: sltu t0, a7, a6 +; RV32I-NEXT: srli t1, a3, 20 +; RV32I-NEXT: slli a5, a1, 12 +; RV32I-NEXT: or a5, a5, t1 +; RV32I-NEXT: srli a3, a3, 24 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: sub t2, a1, a5 +; RV32I-NEXT: srli a1, a4, 20 +; RV32I-NEXT: slli a3, a2, 12 +; RV32I-NEXT: or a3, a3, a1 +; RV32I-NEXT: srli a1, a4, 24 +; RV32I-NEXT: slli a2, a2, 8 +; RV32I-NEXT: or a5, a2, a1 +; RV32I-NEXT: slli t1, a4, 12 +; RV32I-NEXT: slli t3, a4, 8 +; RV32I-NEXT: sltu t4, t3, t1 +; RV32I-NEXT: sub t0, t2, t0 +; RV32I-NEXT: mv a2, t4 +; RV32I-NEXT: beq a5, a3, .LBB26_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu a2, a5, a3 +; RV32I-NEXT: .LBB26_2: +; RV32I-NEXT: sub a1, a7, a6 +; RV32I-NEXT: sltu a4, a1, a2 +; RV32I-NEXT: sub a4, t0, a4 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: sub a2, a5, a3 +; RV32I-NEXT: sub a2, a2, t4 +; RV32I-NEXT: sub a3, t3, t1 +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m3840: @@ -1052,14 +1050,18 @@ ; ; RV64I-LABEL: muli128_m3840: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lui a2, 1048575 -; RV64I-NEXT: addiw a2, a2, 256 -; RV64I-NEXT: addi a3, zero, -1 -; RV64I-NEXT: call __multi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: srli a2, a0, 52 +; RV64I-NEXT: slli a3, a1, 12 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: sub a1, a1, a2 +; RV64I-NEXT: slli a2, a0, 12 +; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: sltu a3, a0, a2 +; RV64I-NEXT: sub a1, a1, a3 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli128_m3840: @@ -1079,39 +1081,40 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32I-LABEL: muli128_m63: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -64 -; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a2, 4(a1) +; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw t0, 12(a1) ; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: addi a0, zero, -1 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: addi a0, zero, -63 -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: sw a1, 36(sp) -; RV32I-NEXT: sw a4, 32(sp) -; RV32I-NEXT: sw a2, 28(sp) -; RV32I-NEXT: addi a0, sp, 40 -; RV32I-NEXT: addi a1, sp, 24 -; RV32I-NEXT: addi a2, sp, 8 -; RV32I-NEXT: sw a3, 24(sp) -; RV32I-NEXT: call __multi3@plt -; RV32I-NEXT: lw a0, 52(sp) -; RV32I-NEXT: lw a1, 48(sp) -; RV32I-NEXT: lw a2, 44(sp) -; RV32I-NEXT: lw a3, 40(sp) -; RV32I-NEXT: sw a0, 12(s0) -; RV32I-NEXT: sw a1, 8(s0) -; RV32I-NEXT: sw a2, 4(s0) -; RV32I-NEXT: sw a3, 0(s0) -; RV32I-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: slli a6, a2, 6 +; RV32I-NEXT: sltu a7, a2, a6 +; RV32I-NEXT: srli a1, a2, 26 +; RV32I-NEXT: slli a5, a3, 6 +; RV32I-NEXT: or t2, a5, a1 +; RV32I-NEXT: mv t3, a7 +; RV32I-NEXT: beq a3, t2, .LBB27_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: sltu t3, a3, t2 +; RV32I-NEXT: .LBB27_2: +; RV32I-NEXT: srli t1, a3, 26 +; RV32I-NEXT: slli a1, a4, 6 +; RV32I-NEXT: or a1, a1, t1 +; RV32I-NEXT: sub a5, a4, a1 +; RV32I-NEXT: sltu t1, a5, t3 +; RV32I-NEXT: sltu t4, a4, a1 +; RV32I-NEXT: srli a4, a4, 26 +; RV32I-NEXT: slli a1, t0, 6 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: sub a1, t0, a1 +; RV32I-NEXT: sub a1, a1, t4 +; RV32I-NEXT: sub a1, a1, t1 +; RV32I-NEXT: sub a4, a5, t3 +; RV32I-NEXT: sub a3, a3, t2 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a2, a2, a6 +; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m63: @@ -1153,13 +1156,14 @@ ; ; RV64I-LABEL: muli128_m63: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a2, zero, -63 -; RV64I-NEXT: addi a3, zero, -1 -; RV64I-NEXT: call __multi3@plt -; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: slli a2, a0, 6 +; RV64I-NEXT: sltu a3, a0, a2 +; RV64I-NEXT: srli a4, a0, 58 +; RV64I-NEXT: slli a5, a1, 6 +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: sub a1, a1, a4 +; RV64I-NEXT: sub a1, a1, a3 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV64IM-LABEL: muli128_m63: