diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -199,6 +199,9 @@ setOperationAction(ISD::USUBO, MVT::i32, Custom); setOperationAction(ISD::UADDSAT, MVT::i32, Custom); setOperationAction(ISD::USUBSAT, MVT::i32, Custom); + } else { + setLibcallName(RTLIB::MUL_I128, nullptr); + setLibcallName(RTLIB::MULO_I64, nullptr); } if (!Subtarget.hasStdExtM()) { diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1189,40 +1189,58 @@ ; ; RV32IM-LABEL: muli128_m3840: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -64 -; RV32IM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a2, 4(a1) -; RV32IM-NEXT: lw a4, 8(a1) -; RV32IM-NEXT: lw a1, 12(a1) -; RV32IM-NEXT: mv s0, a0 -; RV32IM-NEXT: addi a0, zero, -1 -; RV32IM-NEXT: sw a0, 20(sp) -; RV32IM-NEXT: sw a0, 16(sp) -; RV32IM-NEXT: sw a0, 12(sp) -; RV32IM-NEXT: lui a0, 1048575 -; RV32IM-NEXT: addi a0, a0, 256 -; RV32IM-NEXT: sw a0, 8(sp) -; RV32IM-NEXT: sw a1, 36(sp) -; RV32IM-NEXT: sw a4, 32(sp) -; RV32IM-NEXT: sw a2, 28(sp) -; RV32IM-NEXT: addi a0, sp, 40 -; RV32IM-NEXT: addi a1, sp, 24 -; RV32IM-NEXT: addi a2, sp, 8 -; RV32IM-NEXT: sw a3, 24(sp) -; RV32IM-NEXT: call __multi3@plt -; RV32IM-NEXT: lw a0, 52(sp) -; RV32IM-NEXT: lw a1, 48(sp) -; RV32IM-NEXT: lw a2, 44(sp) -; RV32IM-NEXT: lw a3, 40(sp) -; RV32IM-NEXT: sw a0, 12(s0) -; RV32IM-NEXT: sw a1, 8(s0) -; RV32IM-NEXT: sw a2, 4(s0) -; RV32IM-NEXT: sw a3, 0(s0) -; RV32IM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 64 +; RV32IM-NEXT: addi sp, sp, -16 +; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a6, 12(a1) +; RV32IM-NEXT: lw t0, 8(a1) +; RV32IM-NEXT: lw a4, 0(a1) +; RV32IM-NEXT: lw a1, 4(a1) +; RV32IM-NEXT: lui a5, 1048575 +; RV32IM-NEXT: addi a7, a5, 256 +; RV32IM-NEXT: mulhu a2, a4, a7 +; RV32IM-NEXT: mul a5, a1, a7 +; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: sltu a5, a2, a5 +; RV32IM-NEXT: mulhu a3, a1, a7 +; RV32IM-NEXT: add t5, a3, a5 +; RV32IM-NEXT: sub t1, a2, a4 +; RV32IM-NEXT: neg t4, a4 +; RV32IM-NEXT: sltu a2, t1, t4 +; RV32IM-NEXT: addi t2, zero, -1 +; RV32IM-NEXT: mulhu t3, a4, t2 +; RV32IM-NEXT: add a2, t3, a2 +; RV32IM-NEXT: add a2, t5, a2 +; RV32IM-NEXT: sub a5, a2, a1 +; RV32IM-NEXT: mul a3, t0, a7 +; RV32IM-NEXT: sub a3, a3, a4 +; RV32IM-NEXT: add t6, a5, a3 +; RV32IM-NEXT: sltu s0, t6, a5 +; RV32IM-NEXT: neg s1, a1 +; RV32IM-NEXT: sltu a5, a5, s1 +; RV32IM-NEXT: sltu a2, a2, t5 +; RV32IM-NEXT: mulhu s1, a1, t2 +; RV32IM-NEXT: add a2, s1, a2 +; RV32IM-NEXT: add a2, a2, a5 +; RV32IM-NEXT: sltu a3, a3, t4 +; RV32IM-NEXT: mul a5, a6, a7 +; RV32IM-NEXT: mulhu s1, t0, a7 +; RV32IM-NEXT: sub s1, s1, t0 +; RV32IM-NEXT: add a5, s1, a5 +; RV32IM-NEXT: sub s1, t3, a4 +; RV32IM-NEXT: sub a1, s1, a1 +; RV32IM-NEXT: add a1, a1, a5 +; RV32IM-NEXT: add a1, a1, a3 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: add a1, a1, s0 +; RV32IM-NEXT: mul a2, a4, a7 +; RV32IM-NEXT: sw a2, 0(a0) +; RV32IM-NEXT: sw t1, 4(a0) +; RV32IM-NEXT: sw t6, 8(a0) +; RV32IM-NEXT: sw a1, 12(a0) +; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 16 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli128_m3840: @@ -1296,39 +1314,63 @@ ; ; RV32IM-LABEL: muli128_m63: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -64 -; RV32IM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill +; RV32IM-NEXT: addi sp, sp, -16 +; RV32IM-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32IM-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: lw a7, 12(a1) ; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a2, 4(a1) -; RV32IM-NEXT: lw a4, 8(a1) -; RV32IM-NEXT: lw a1, 12(a1) -; RV32IM-NEXT: mv s0, a0 -; RV32IM-NEXT: addi a0, zero, -1 -; RV32IM-NEXT: sw a0, 20(sp) -; RV32IM-NEXT: sw a0, 16(sp) -; RV32IM-NEXT: sw a0, 12(sp) -; RV32IM-NEXT: addi a0, zero, -63 -; RV32IM-NEXT: sw a0, 8(sp) -; RV32IM-NEXT: sw a1, 36(sp) -; RV32IM-NEXT: sw a4, 32(sp) -; RV32IM-NEXT: sw a2, 28(sp) -; RV32IM-NEXT: addi a0, sp, 40 -; RV32IM-NEXT: addi a1, sp, 24 -; RV32IM-NEXT: addi a2, sp, 8 -; RV32IM-NEXT: sw a3, 24(sp) -; RV32IM-NEXT: call __multi3@plt -; RV32IM-NEXT: lw a0, 52(sp) -; RV32IM-NEXT: lw a1, 48(sp) -; RV32IM-NEXT: lw a2, 44(sp) -; RV32IM-NEXT: lw a3, 40(sp) -; RV32IM-NEXT: sw a0, 12(s0) -; RV32IM-NEXT: sw a1, 8(s0) -; RV32IM-NEXT: sw a2, 4(s0) -; RV32IM-NEXT: sw a3, 0(s0) -; RV32IM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32IM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 64 +; RV32IM-NEXT: lw a4, 4(a1) +; RV32IM-NEXT: lw t5, 8(a1) +; RV32IM-NEXT: addi a6, zero, -63 +; RV32IM-NEXT: mulhu a5, a3, a6 +; RV32IM-NEXT: slli a2, a4, 6 +; RV32IM-NEXT: sub a2, a2, a4 +; RV32IM-NEXT: sub a5, a5, a2 +; RV32IM-NEXT: neg a2, a2 +; RV32IM-NEXT: sltu t0, a5, a2 +; RV32IM-NEXT: mulhu a2, a4, a6 +; RV32IM-NEXT: add t4, a2, t0 +; RV32IM-NEXT: sub t0, a5, a3 +; RV32IM-NEXT: neg t1, a3 +; RV32IM-NEXT: sltu a5, t0, t1 +; RV32IM-NEXT: addi t2, zero, -1 +; RV32IM-NEXT: mulhu t3, a3, t2 +; RV32IM-NEXT: add a5, t3, a5 +; RV32IM-NEXT: add a5, t4, a5 +; RV32IM-NEXT: sub a2, a5, a4 +; RV32IM-NEXT: slli a1, t5, 6 +; RV32IM-NEXT: sub a1, a1, t5 +; RV32IM-NEXT: add a1, a1, a3 +; RV32IM-NEXT: sub t6, a2, a1 +; RV32IM-NEXT: sltu s0, t6, a2 +; RV32IM-NEXT: neg s1, a4 +; RV32IM-NEXT: sltu a2, a2, s1 +; RV32IM-NEXT: sltu a5, a5, t4 +; RV32IM-NEXT: mulhu s1, a4, t2 +; RV32IM-NEXT: add a5, s1, a5 +; RV32IM-NEXT: add a2, a5, a2 +; RV32IM-NEXT: slli a5, a7, 6 +; RV32IM-NEXT: sub a5, a7, a5 +; RV32IM-NEXT: mulhu s1, t5, a6 +; RV32IM-NEXT: sub s1, s1, t5 +; RV32IM-NEXT: add a5, s1, a5 +; RV32IM-NEXT: sub s1, t3, a3 +; RV32IM-NEXT: sub a4, s1, a4 +; RV32IM-NEXT: add a4, a4, a5 +; RV32IM-NEXT: neg a1, a1 +; RV32IM-NEXT: sltu a1, a1, t1 +; RV32IM-NEXT: add a1, a4, a1 +; RV32IM-NEXT: add a1, a2, a1 +; RV32IM-NEXT: add a1, a1, s0 +; RV32IM-NEXT: slli a2, a3, 6 +; RV32IM-NEXT: sub a2, a3, a2 +; RV32IM-NEXT: sw a2, 0(a0) +; RV32IM-NEXT: sw t0, 4(a0) +; RV32IM-NEXT: sw t6, 8(a0) +; RV32IM-NEXT: sw a1, 12(a0) +; RV32IM-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32IM-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32IM-NEXT: addi sp, sp, 16 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: muli128_m63: @@ -1361,48 +1403,123 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: mulhsu_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -64 -; RV32I-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-NEXT: srai a4, a3, 31 -; RV32I-NEXT: sw a3, 12(sp) -; RV32I-NEXT: sw a2, 8(sp) -; RV32I-NEXT: sw zero, 36(sp) -; RV32I-NEXT: sw zero, 32(sp) -; RV32I-NEXT: sw a1, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: sw a4, 20(sp) -; RV32I-NEXT: addi a0, sp, 40 -; RV32I-NEXT: addi a1, sp, 24 -; RV32I-NEXT: addi a2, sp, 8 -; RV32I-NEXT: sw a4, 16(sp) -; RV32I-NEXT: call __multi3@plt -; RV32I-NEXT: lw a0, 48(sp) -; RV32I-NEXT: lw a1, 52(sp) -; RV32I-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 20(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s9, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a3 +; RV32I-NEXT: mv s5, a2 +; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: srai s4, a3, 31 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __muldi3@plt +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: mv a2, s5 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __muldi3@plt +; RV32I-NEXT: add s1, a0, s1 +; RV32I-NEXT: sltu a0, s1, a0 +; RV32I-NEXT: add s7, a1, a0 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __muldi3@plt +; RV32I-NEXT: add a2, a0, s1 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add s8, s7, a0 +; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a1, zero +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __muldi3@plt +; RV32I-NEXT: mv s9, a0 +; RV32I-NEXT: mv s6, a1 +; RV32I-NEXT: add s1, a0, s8 +; RV32I-NEXT: mv a0, s5 +; RV32I-NEXT: mv a1, s0 +; RV32I-NEXT: mv a2, zero +; RV32I-NEXT: mv a3, zero +; RV32I-NEXT: call __muldi3@plt +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s5, a1 +; RV32I-NEXT: mv a0, s4 +; RV32I-NEXT: mv a1, s4 +; RV32I-NEXT: mv a2, s3 +; RV32I-NEXT: mv a3, s2 +; RV32I-NEXT: call __muldi3@plt +; RV32I-NEXT: add a3, a0, s0 +; RV32I-NEXT: add a2, s1, a3 +; RV32I-NEXT: sltu a4, a2, s1 +; RV32I-NEXT: sltu a5, s1, s9 +; RV32I-NEXT: sltu s1, s8, s7 +; RV32I-NEXT: add s1, s6, s1 +; RV32I-NEXT: add a5, s1, a5 +; RV32I-NEXT: add a1, a1, s5 +; RV32I-NEXT: sltu a0, a3, a0 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, a5, a0 +; RV32I-NEXT: add a1, a0, a4 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lw s9, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s8, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s7, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s6, 16(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 24(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 28(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s2, 32(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV32IM-LABEL: mulhsu_i64: ; RV32IM: # %bb.0: -; RV32IM-NEXT: addi sp, sp, -64 -; RV32IM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32IM-NEXT: srai a4, a3, 31 -; RV32IM-NEXT: sw a3, 12(sp) -; RV32IM-NEXT: sw a2, 8(sp) -; RV32IM-NEXT: sw zero, 36(sp) -; RV32IM-NEXT: sw zero, 32(sp) -; RV32IM-NEXT: sw a1, 28(sp) -; RV32IM-NEXT: sw a0, 24(sp) -; RV32IM-NEXT: sw a4, 20(sp) -; RV32IM-NEXT: addi a0, sp, 40 -; RV32IM-NEXT: addi a1, sp, 24 -; RV32IM-NEXT: addi a2, sp, 8 -; RV32IM-NEXT: sw a4, 16(sp) -; RV32IM-NEXT: call __multi3@plt -; RV32IM-NEXT: lw a0, 48(sp) -; RV32IM-NEXT: lw a1, 52(sp) -; RV32IM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32IM-NEXT: addi sp, sp, 64 +; RV32IM-NEXT: srai a7, a3, 31 +; RV32IM-NEXT: mulhu a6, a0, a2 +; RV32IM-NEXT: mul a5, a1, a2 +; RV32IM-NEXT: add a4, a5, a6 +; RV32IM-NEXT: sltu a5, a4, a5 +; RV32IM-NEXT: mulhu a2, a1, a2 +; RV32IM-NEXT: add a6, a2, a5 +; RV32IM-NEXT: mul a2, a0, a3 +; RV32IM-NEXT: add a4, a2, a4 +; RV32IM-NEXT: sltu a2, a4, a2 +; RV32IM-NEXT: mulhu a4, a0, a3 +; RV32IM-NEXT: add a2, a4, a2 +; RV32IM-NEXT: add a4, a6, a2 +; RV32IM-NEXT: mul a5, a1, a3 +; RV32IM-NEXT: add a2, a5, a4 +; RV32IM-NEXT: mul t1, a7, a0 +; RV32IM-NEXT: add t0, a2, t1 +; RV32IM-NEXT: sltu t2, t0, a2 +; RV32IM-NEXT: sltu a2, a2, a5 +; RV32IM-NEXT: sltu a4, a4, a6 +; RV32IM-NEXT: mulhu a3, a1, a3 +; RV32IM-NEXT: add a3, a3, a4 +; RV32IM-NEXT: add a2, a3, a2 +; RV32IM-NEXT: mul a1, a7, a1 +; RV32IM-NEXT: mulhu a0, a7, a0 +; RV32IM-NEXT: add a0, a0, a1 +; RV32IM-NEXT: add a0, a0, t1 +; RV32IM-NEXT: add a0, a2, a0 +; RV32IM-NEXT: add a1, a0, t2 +; RV32IM-NEXT: mv a0, t0 ; RV32IM-NEXT: ret ; ; RV64I-LABEL: mulhsu_i64: diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsic-optimizations.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsic-optimizations.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsic-optimizations.ll @@ -0,0 +1,20 @@ +; RUN: llc %s -mtriple=riscv32 -o - | FileCheck %s + +define i1 @no__mulodi4(i32 %a, i64 %b, i32* %c) { +; CHECK-LABEL: no__mulodi4 +; CHECK-NOT: call __mulodi4@plt +; CHECK-NOT: call __multi3@plt +entry: + %0 = sext i32 %a to i64 + %1 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %0, i64 %b) + %2 = extractvalue { i64, i1 } %1, 1 + %3 = extractvalue { i64, i1 } %1, 0 + %4 = trunc i64 %3 to i32 + %5 = sext i32 %4 to i64 + %6 = icmp ne i64 %3, %5 + %7 = or i1 %2, %6 + store i32 %4, i32* %c, align 4 + ret i1 %7 +} + +declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64) diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -4,113 +4,110 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 { ; RISCV32-LABEL: muloti_test: ; RISCV32: # %bb.0: # %start -; RISCV32-NEXT: addi sp, sp, -96 -; RISCV32-NEXT: sw ra, 92(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s0, 88(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s1, 84(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s2, 80(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s3, 76(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s4, 72(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s5, 68(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s6, 64(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s7, 60(sp) # 4-byte Folded Spill -; RISCV32-NEXT: sw s8, 56(sp) # 4-byte Folded Spill -; RISCV32-NEXT: lw s2, 12(a1) -; RISCV32-NEXT: lw s6, 8(a1) -; RISCV32-NEXT: lw s3, 12(a2) -; RISCV32-NEXT: lw s7, 8(a2) -; RISCV32-NEXT: lw s0, 0(a1) -; RISCV32-NEXT: lw s8, 4(a1) -; RISCV32-NEXT: lw s1, 0(a2) -; RISCV32-NEXT: lw s5, 4(a2) -; RISCV32-NEXT: mv s4, a0 -; RISCV32-NEXT: sw zero, 20(sp) -; RISCV32-NEXT: sw zero, 16(sp) -; RISCV32-NEXT: sw zero, 36(sp) -; RISCV32-NEXT: sw zero, 32(sp) -; RISCV32-NEXT: sw s5, 12(sp) -; RISCV32-NEXT: sw s1, 8(sp) -; RISCV32-NEXT: sw s8, 28(sp) -; RISCV32-NEXT: addi a0, sp, 40 -; RISCV32-NEXT: addi a1, sp, 24 -; RISCV32-NEXT: addi a2, sp, 8 -; RISCV32-NEXT: sw s0, 24(sp) -; RISCV32-NEXT: call __multi3@plt -; RISCV32-NEXT: mul a0, s8, s7 -; RISCV32-NEXT: mul a1, s3, s0 -; RISCV32-NEXT: add a0, a1, a0 -; RISCV32-NEXT: mulhu a5, s7, s0 -; RISCV32-NEXT: add a0, a5, a0 -; RISCV32-NEXT: mul a1, s5, s6 -; RISCV32-NEXT: mul a2, s2, s1 -; RISCV32-NEXT: add a1, a2, a1 -; RISCV32-NEXT: mulhu t0, s6, s1 -; RISCV32-NEXT: add t1, t0, a1 -; RISCV32-NEXT: add a6, t1, a0 -; RISCV32-NEXT: mul a1, s7, s0 -; RISCV32-NEXT: mul a3, s6, s1 -; RISCV32-NEXT: add a4, a3, a1 -; RISCV32-NEXT: lw a1, 52(sp) -; RISCV32-NEXT: lw a2, 48(sp) -; RISCV32-NEXT: sltu a3, a4, a3 -; RISCV32-NEXT: add a3, a6, a3 -; RISCV32-NEXT: add a3, a1, a3 -; RISCV32-NEXT: add a6, a2, a4 -; RISCV32-NEXT: sltu a2, a6, a2 -; RISCV32-NEXT: add a7, a3, a2 -; RISCV32-NEXT: beq a7, a1, .LBB0_2 +; RISCV32-NEXT: addi sp, sp, -32 +; RISCV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill +; RISCV32-NEXT: sw s1, 24(sp) # 4-byte Folded Spill +; RISCV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill +; RISCV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill +; RISCV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill +; RISCV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill +; RISCV32-NEXT: sw s6, 4(sp) # 4-byte Folded Spill +; RISCV32-NEXT: lw a6, 12(a1) +; RISCV32-NEXT: lw a7, 12(a2) +; RISCV32-NEXT: lw t3, 8(a1) +; RISCV32-NEXT: lw a4, 0(a2) +; RISCV32-NEXT: lw a5, 0(a1) +; RISCV32-NEXT: lw a3, 4(a1) +; RISCV32-NEXT: lw s2, 8(a2) +; RISCV32-NEXT: lw a2, 4(a2) +; RISCV32-NEXT: mulhu a1, a5, a4 +; RISCV32-NEXT: mul s1, a3, a4 +; RISCV32-NEXT: add a1, s1, a1 +; RISCV32-NEXT: sltu s1, a1, s1 +; RISCV32-NEXT: mulhu s0, a3, a4 +; RISCV32-NEXT: add t4, s0, s1 +; RISCV32-NEXT: mul s0, a5, a2 +; RISCV32-NEXT: add t0, s0, a1 +; RISCV32-NEXT: sltu a1, t0, s0 +; RISCV32-NEXT: mulhu s0, a5, a2 +; RISCV32-NEXT: add a1, s0, a1 +; RISCV32-NEXT: add a1, t4, a1 +; RISCV32-NEXT: mul s0, a3, a2 +; RISCV32-NEXT: add s1, s0, a1 +; RISCV32-NEXT: mul t1, s2, a5 +; RISCV32-NEXT: mul s3, t3, a4 +; RISCV32-NEXT: add s4, s3, t1 +; RISCV32-NEXT: add t1, s1, s4 +; RISCV32-NEXT: sltu t2, t1, s1 +; RISCV32-NEXT: sltu s1, s1, s0 +; RISCV32-NEXT: sltu a1, a1, t4 +; RISCV32-NEXT: mulhu s0, a3, a2 +; RISCV32-NEXT: add a1, s0, a1 +; RISCV32-NEXT: add s0, a1, s1 +; RISCV32-NEXT: mul a1, a3, s2 +; RISCV32-NEXT: mul s1, a7, a5 +; RISCV32-NEXT: add a1, s1, a1 +; RISCV32-NEXT: mulhu s5, s2, a5 +; RISCV32-NEXT: add s6, s5, a1 +; RISCV32-NEXT: mul s1, a2, t3 +; RISCV32-NEXT: mul a1, a6, a4 +; RISCV32-NEXT: add a1, a1, s1 +; RISCV32-NEXT: mulhu t5, t3, a4 +; RISCV32-NEXT: add t6, t5, a1 +; RISCV32-NEXT: add a1, t6, s6 +; RISCV32-NEXT: sltu s1, s4, s3 +; RISCV32-NEXT: add a1, a1, s1 +; RISCV32-NEXT: add a1, s0, a1 +; RISCV32-NEXT: add t4, a1, t2 +; RISCV32-NEXT: beq t4, s0, .LBB0_2 ; RISCV32-NEXT: # %bb.1: # %start -; RISCV32-NEXT: sltu a2, a7, a1 +; RISCV32-NEXT: sltu t2, t4, s0 ; RISCV32-NEXT: .LBB0_2: # %start -; RISCV32-NEXT: sltu a0, a0, a5 -; RISCV32-NEXT: snez a1, s8 -; RISCV32-NEXT: snez a3, s3 -; RISCV32-NEXT: and a1, a3, a1 -; RISCV32-NEXT: mulhu a3, s3, s0 +; RISCV32-NEXT: sltu a1, s6, s5 +; RISCV32-NEXT: snez s0, a3 +; RISCV32-NEXT: snez s1, a7 +; RISCV32-NEXT: and s0, s1, s0 +; RISCV32-NEXT: mulhu s1, a7, a5 +; RISCV32-NEXT: snez s1, s1 +; RISCV32-NEXT: or s0, s0, s1 +; RISCV32-NEXT: mulhu a3, a3, s2 ; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: or a1, a1, a3 -; RISCV32-NEXT: mulhu a3, s8, s7 -; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: or a1, a1, a3 -; RISCV32-NEXT: or a0, a1, a0 -; RISCV32-NEXT: sltu a1, t1, t0 -; RISCV32-NEXT: snez a3, s5 -; RISCV32-NEXT: snez a4, s2 -; RISCV32-NEXT: and a3, a4, a3 -; RISCV32-NEXT: mulhu a4, s2, s1 -; RISCV32-NEXT: snez a4, a4 -; RISCV32-NEXT: or a3, a3, a4 -; RISCV32-NEXT: mulhu a4, s5, s6 -; RISCV32-NEXT: snez a4, a4 -; RISCV32-NEXT: or a3, a3, a4 +; RISCV32-NEXT: or a3, s0, a3 ; RISCV32-NEXT: or a1, a3, a1 -; RISCV32-NEXT: or a3, s7, s3 +; RISCV32-NEXT: sltu a3, t6, t5 +; RISCV32-NEXT: snez s1, a2 +; RISCV32-NEXT: snez s0, a6 +; RISCV32-NEXT: and s1, s0, s1 +; RISCV32-NEXT: mulhu s0, a6, a4 +; RISCV32-NEXT: snez s0, s0 +; RISCV32-NEXT: or s1, s1, s0 +; RISCV32-NEXT: mulhu a2, a2, t3 +; RISCV32-NEXT: snez a2, a2 +; RISCV32-NEXT: or a2, s1, a2 +; RISCV32-NEXT: or a2, a2, a3 +; RISCV32-NEXT: or a3, s2, a7 ; RISCV32-NEXT: snez a3, a3 -; RISCV32-NEXT: or a4, s6, s2 -; RISCV32-NEXT: snez a4, a4 -; RISCV32-NEXT: and a3, a4, a3 -; RISCV32-NEXT: or a1, a3, a1 -; RISCV32-NEXT: or a0, a1, a0 -; RISCV32-NEXT: lw a1, 44(sp) -; RISCV32-NEXT: lw a3, 40(sp) -; RISCV32-NEXT: or a0, a0, a2 -; RISCV32-NEXT: andi a0, a0, 1 -; RISCV32-NEXT: sw a1, 4(s4) -; RISCV32-NEXT: sw a3, 0(s4) -; RISCV32-NEXT: sw a6, 8(s4) -; RISCV32-NEXT: sw a7, 12(s4) -; RISCV32-NEXT: sb a0, 16(s4) -; RISCV32-NEXT: lw s8, 56(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s7, 60(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s6, 64(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s5, 68(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s4, 72(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s3, 76(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s2, 80(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s1, 84(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw s0, 88(sp) # 4-byte Folded Reload -; RISCV32-NEXT: lw ra, 92(sp) # 4-byte Folded Reload -; RISCV32-NEXT: addi sp, sp, 96 +; RISCV32-NEXT: or s1, t3, a6 +; RISCV32-NEXT: snez s1, s1 +; RISCV32-NEXT: and a3, s1, a3 +; RISCV32-NEXT: or a2, a3, a2 +; RISCV32-NEXT: or a1, a2, a1 +; RISCV32-NEXT: or a1, a1, t2 +; RISCV32-NEXT: mul a2, a5, a4 +; RISCV32-NEXT: andi a1, a1, 1 +; RISCV32-NEXT: sw a2, 0(a0) +; RISCV32-NEXT: sw t0, 4(a0) +; RISCV32-NEXT: sw t1, 8(a0) +; RISCV32-NEXT: sw t4, 12(a0) +; RISCV32-NEXT: sb a1, 16(a0) +; RISCV32-NEXT: lw s6, 4(sp) # 4-byte Folded Reload +; RISCV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload +; RISCV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload +; RISCV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload +; RISCV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload +; RISCV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload +; RISCV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload +; RISCV32-NEXT: addi sp, sp, 32 ; RISCV32-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -903,21 +903,66 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: mv s0, a4 -; RV32-NEXT: sw zero, 4(sp) -; RV32-NEXT: addi a4, sp, 4 -; RV32-NEXT: call __mulodi4@plt -; RV32-NEXT: lw a2, 4(sp) -; RV32-NEXT: snez a2, a2 -; RV32-NEXT: sw a1, 4(s0) -; RV32-NEXT: sw a0, 0(s0) -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s3, 0(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: .cfi_offset s3, -16 +; RV32-NEXT: mulhu a6, a0, a2 +; RV32-NEXT: mul a5, a1, a2 +; RV32-NEXT: add a6, a5, a6 +; RV32-NEXT: sltu a7, a6, a5 +; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: add a7, a5, a7 +; RV32-NEXT: mul a5, a0, a3 +; RV32-NEXT: add a6, a5, a6 +; RV32-NEXT: sltu t0, a6, a5 +; RV32-NEXT: mulhu a5, a0, a3 +; RV32-NEXT: add a5, a5, t0 +; RV32-NEXT: add t0, a7, a5 +; RV32-NEXT: mul t1, a1, a3 +; RV32-NEXT: add a5, t1, t0 +; RV32-NEXT: srai t2, a1, 31 +; RV32-NEXT: mul t3, a2, t2 +; RV32-NEXT: srai t4, a3, 31 +; RV32-NEXT: mul t5, t4, a0 +; RV32-NEXT: add t6, t5, t3 +; RV32-NEXT: add s3, a5, t6 +; RV32-NEXT: sltu s2, s3, a5 +; RV32-NEXT: sltu a5, a5, t1 +; RV32-NEXT: sltu s1, t0, a7 +; RV32-NEXT: mulhu s0, a1, a3 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: add a5, s1, a5 +; RV32-NEXT: mulhu s1, a2, t2 +; RV32-NEXT: add s1, s1, t3 +; RV32-NEXT: mul a3, a3, t2 +; RV32-NEXT: add a3, s1, a3 +; RV32-NEXT: mul a1, t4, a1 +; RV32-NEXT: mulhu s1, t4, a0 +; RV32-NEXT: add a1, s1, a1 +; RV32-NEXT: add a1, a1, t5 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: sltu a3, t6, t5 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: add a1, a1, s2 +; RV32-NEXT: srai a3, a6, 31 +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: xor a3, s3, a3 +; RV32-NEXT: or a1, a3, a1 +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: sw a0, 0(a4) +; RV32-NEXT: sw a6, 4(a4) +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: lw s3, 0(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -935,21 +980,66 @@ ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 -; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: .cfi_offset ra, -4 -; RV32ZBA-NEXT: .cfi_offset s0, -8 -; RV32ZBA-NEXT: mv s0, a4 -; RV32ZBA-NEXT: sw zero, 4(sp) -; RV32ZBA-NEXT: addi a4, sp, 4 -; RV32ZBA-NEXT: call __mulodi4@plt -; RV32ZBA-NEXT: lw a2, 4(sp) -; RV32ZBA-NEXT: snez a2, a2 -; RV32ZBA-NEXT: sw a1, 4(s0) -; RV32ZBA-NEXT: sw a0, 0(s0) -; RV32ZBA-NEXT: mv a0, a2 -; RV32ZBA-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s3, 0(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset s0, -4 +; RV32ZBA-NEXT: .cfi_offset s1, -8 +; RV32ZBA-NEXT: .cfi_offset s2, -12 +; RV32ZBA-NEXT: .cfi_offset s3, -16 +; RV32ZBA-NEXT: mulhu a6, a0, a2 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: sltu a7, a6, a5 +; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: add a7, a5, a7 +; RV32ZBA-NEXT: mul a5, a0, a3 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: sltu t0, a6, a5 +; RV32ZBA-NEXT: mulhu a5, a0, a3 +; RV32ZBA-NEXT: add a5, a5, t0 +; RV32ZBA-NEXT: add t0, a7, a5 +; RV32ZBA-NEXT: mul t1, a1, a3 +; RV32ZBA-NEXT: add a5, t1, t0 +; RV32ZBA-NEXT: srai t2, a1, 31 +; RV32ZBA-NEXT: mul t3, a2, t2 +; RV32ZBA-NEXT: srai t4, a3, 31 +; RV32ZBA-NEXT: mul t5, t4, a0 +; RV32ZBA-NEXT: add t6, t5, t3 +; RV32ZBA-NEXT: add s3, a5, t6 +; RV32ZBA-NEXT: sltu s2, s3, a5 +; RV32ZBA-NEXT: sltu a5, a5, t1 +; RV32ZBA-NEXT: sltu s1, t0, a7 +; RV32ZBA-NEXT: mulhu s0, a1, a3 +; RV32ZBA-NEXT: add s1, s0, s1 +; RV32ZBA-NEXT: add a5, s1, a5 +; RV32ZBA-NEXT: mulhu s1, a2, t2 +; RV32ZBA-NEXT: add s1, s1, t3 +; RV32ZBA-NEXT: mul a3, a3, t2 +; RV32ZBA-NEXT: add a3, s1, a3 +; RV32ZBA-NEXT: mul a1, t4, a1 +; RV32ZBA-NEXT: mulhu s1, t4, a0 +; RV32ZBA-NEXT: add a1, s1, a1 +; RV32ZBA-NEXT: add a1, a1, t5 +; RV32ZBA-NEXT: add a1, a1, a3 +; RV32ZBA-NEXT: sltu a3, t6, t5 +; RV32ZBA-NEXT: add a1, a1, a3 +; RV32ZBA-NEXT: add a1, a5, a1 +; RV32ZBA-NEXT: add a1, a1, s2 +; RV32ZBA-NEXT: srai a3, a6, 31 +; RV32ZBA-NEXT: xor a1, a1, a3 +; RV32ZBA-NEXT: xor a3, s3, a3 +; RV32ZBA-NEXT: or a1, a3, a1 +; RV32ZBA-NEXT: snez a1, a1 +; RV32ZBA-NEXT: mul a0, a0, a2 +; RV32ZBA-NEXT: sw a0, 0(a4) +; RV32ZBA-NEXT: sw a6, 4(a4) +; RV32ZBA-NEXT: mv a0, a1 +; RV32ZBA-NEXT: lw s3, 0(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -973,26 +1063,28 @@ define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) { ; RV32-LABEL: smulo2.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: mv s0, a2 -; RV32-NEXT: sw zero, 4(sp) -; RV32-NEXT: addi a2, zero, 13 -; RV32-NEXT: addi a4, sp, 4 -; RV32-NEXT: mv a3, zero -; RV32-NEXT: call __mulodi4@plt -; RV32-NEXT: lw a2, 4(sp) -; RV32-NEXT: snez a2, a2 -; RV32-NEXT: sw a1, 4(s0) -; RV32-NEXT: sw a0, 0(s0) -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: addi a7, zero, 13 +; RV32-NEXT: mulhu a4, a0, a7 +; RV32-NEXT: mul a5, a1, a7 +; RV32-NEXT: add t0, a5, a4 +; RV32-NEXT: sltu a6, t0, a5 +; RV32-NEXT: mulhu a5, a1, a7 +; RV32-NEXT: add a5, a5, a6 +; RV32-NEXT: srai a1, a1, 31 +; RV32-NEXT: mul a3, a1, a7 +; RV32-NEXT: add a3, a5, a3 +; RV32-NEXT: srai a4, t0, 31 +; RV32-NEXT: xor a6, a3, a4 +; RV32-NEXT: sltu a3, a3, a5 +; RV32-NEXT: mulh a1, a1, a7 +; RV32-NEXT: add a1, a1, a3 +; RV32-NEXT: xor a1, a1, a4 +; RV32-NEXT: or a1, a6, a1 +; RV32-NEXT: snez a1, a1 +; RV32-NEXT: mul a0, a0, a7 +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: sw t0, 4(a2) +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: smulo2.i64: @@ -1008,26 +1100,28 @@ ; ; RV32ZBA-LABEL: smulo2.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: addi sp, sp, -16 -; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 -; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: .cfi_offset ra, -4 -; RV32ZBA-NEXT: .cfi_offset s0, -8 -; RV32ZBA-NEXT: mv s0, a2 -; RV32ZBA-NEXT: sw zero, 4(sp) -; RV32ZBA-NEXT: addi a2, zero, 13 -; RV32ZBA-NEXT: addi a4, sp, 4 -; RV32ZBA-NEXT: mv a3, zero -; RV32ZBA-NEXT: call __mulodi4@plt -; RV32ZBA-NEXT: lw a2, 4(sp) -; RV32ZBA-NEXT: snez a2, a2 -; RV32ZBA-NEXT: sw a1, 4(s0) -; RV32ZBA-NEXT: sw a0, 0(s0) -; RV32ZBA-NEXT: mv a0, a2 -; RV32ZBA-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: addi sp, sp, 16 +; RV32ZBA-NEXT: addi a7, zero, 13 +; RV32ZBA-NEXT: mulhu a4, a0, a7 +; RV32ZBA-NEXT: mul a5, a1, a7 +; RV32ZBA-NEXT: add t0, a5, a4 +; RV32ZBA-NEXT: sltu a6, t0, a5 +; RV32ZBA-NEXT: mulhu a5, a1, a7 +; RV32ZBA-NEXT: add a5, a5, a6 +; RV32ZBA-NEXT: srai a1, a1, 31 +; RV32ZBA-NEXT: mul a3, a1, a7 +; RV32ZBA-NEXT: add a3, a5, a3 +; RV32ZBA-NEXT: srai a4, t0, 31 +; RV32ZBA-NEXT: xor a6, a3, a4 +; RV32ZBA-NEXT: sltu a3, a3, a5 +; RV32ZBA-NEXT: mulh a1, a1, a7 +; RV32ZBA-NEXT: add a1, a1, a3 +; RV32ZBA-NEXT: xor a1, a1, a4 +; RV32ZBA-NEXT: or a1, a6, a1 +; RV32ZBA-NEXT: snez a1, a1 +; RV32ZBA-NEXT: mul a0, a0, a7 +; RV32ZBA-NEXT: sw a0, 0(a2) +; RV32ZBA-NEXT: sw t0, 4(a2) +; RV32ZBA-NEXT: mv a0, a1 ; RV32ZBA-NEXT: ret ; ; RV64ZBA-LABEL: smulo2.i64: @@ -2243,39 +2337,66 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) { ; RV32-LABEL: smulo.select.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: .cfi_offset s0, -8 -; RV32-NEXT: .cfi_offset s1, -12 -; RV32-NEXT: .cfi_offset s2, -16 -; RV32-NEXT: .cfi_offset s3, -20 -; RV32-NEXT: mv s2, a3 -; RV32-NEXT: mv s3, a2 -; RV32-NEXT: mv s0, a1 -; RV32-NEXT: mv s1, a0 -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: call __mulodi4@plt -; RV32-NEXT: lw a0, 8(sp) -; RV32-NEXT: bnez a0, .LBB44_2 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: mulhu a4, a0, a2 +; RV32-NEXT: mul a5, a1, a2 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: add a6, a5, a6 +; RV32-NEXT: mul a5, a0, a3 +; RV32-NEXT: add a7, a5, a4 +; RV32-NEXT: sltu a5, a7, a5 +; RV32-NEXT: mulhu a4, a0, a3 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add t0, a6, a4 +; RV32-NEXT: mul t1, a1, a3 +; RV32-NEXT: add a4, t1, t0 +; RV32-NEXT: srai a5, a1, 31 +; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: srai t3, a3, 31 +; RV32-NEXT: mul t4, t3, a0 +; RV32-NEXT: add t5, t4, t2 +; RV32-NEXT: add t6, a4, t5 +; RV32-NEXT: sltu s2, t6, a4 +; RV32-NEXT: sltu a4, a4, t1 +; RV32-NEXT: sltu s0, t0, a6 +; RV32-NEXT: mulhu s1, a1, a3 +; RV32-NEXT: add s1, s1, s0 +; RV32-NEXT: add a4, s1, a4 +; RV32-NEXT: mulhu s1, a2, a5 +; RV32-NEXT: add s1, s1, t2 +; RV32-NEXT: mul a5, a3, a5 +; RV32-NEXT: add a5, s1, a5 +; RV32-NEXT: mul s1, t3, a1 +; RV32-NEXT: mulhu s0, t3, a0 +; RV32-NEXT: add s1, s0, s1 +; RV32-NEXT: add s1, s1, t4 +; RV32-NEXT: add a5, s1, a5 +; RV32-NEXT: sltu s1, t5, t4 +; RV32-NEXT: add a5, a5, s1 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add a4, a4, s2 +; RV32-NEXT: srai a5, a7, 31 +; RV32-NEXT: xor a4, a4, a5 +; RV32-NEXT: xor a5, t6, a5 +; RV32-NEXT: or a4, a5, a4 +; RV32-NEXT: bnez a4, .LBB44_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: mv s1, s3 -; RV32-NEXT: mv s0, s2 +; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB44_2: # %entry -; RV32-NEXT: mv a0, s1 -; RV32-NEXT: mv a1, s0 -; RV32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: smulo.select.i64: @@ -2291,39 +2412,66 @@ ; ; RV32ZBA-LABEL: smulo.select.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: addi sp, sp, -32 -; RV32ZBA-NEXT: .cfi_def_cfa_offset 32 -; RV32ZBA-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: .cfi_offset ra, -4 -; RV32ZBA-NEXT: .cfi_offset s0, -8 -; RV32ZBA-NEXT: .cfi_offset s1, -12 -; RV32ZBA-NEXT: .cfi_offset s2, -16 -; RV32ZBA-NEXT: .cfi_offset s3, -20 -; RV32ZBA-NEXT: mv s2, a3 -; RV32ZBA-NEXT: mv s3, a2 -; RV32ZBA-NEXT: mv s0, a1 -; RV32ZBA-NEXT: mv s1, a0 -; RV32ZBA-NEXT: sw zero, 8(sp) -; RV32ZBA-NEXT: addi a4, sp, 8 -; RV32ZBA-NEXT: call __mulodi4@plt -; RV32ZBA-NEXT: lw a0, 8(sp) -; RV32ZBA-NEXT: bnez a0, .LBB44_2 +; RV32ZBA-NEXT: addi sp, sp, -16 +; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 +; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset s0, -4 +; RV32ZBA-NEXT: .cfi_offset s1, -8 +; RV32ZBA-NEXT: .cfi_offset s2, -12 +; RV32ZBA-NEXT: mulhu a4, a0, a2 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: mul a5, a0, a3 +; RV32ZBA-NEXT: add a7, a5, a4 +; RV32ZBA-NEXT: sltu a5, a7, a5 +; RV32ZBA-NEXT: mulhu a4, a0, a3 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: add t0, a6, a4 +; RV32ZBA-NEXT: mul t1, a1, a3 +; RV32ZBA-NEXT: add a4, t1, t0 +; RV32ZBA-NEXT: srai a5, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: srai t3, a3, 31 +; RV32ZBA-NEXT: mul t4, t3, a0 +; RV32ZBA-NEXT: add t5, t4, t2 +; RV32ZBA-NEXT: add t6, a4, t5 +; RV32ZBA-NEXT: sltu s2, t6, a4 +; RV32ZBA-NEXT: sltu a4, a4, t1 +; RV32ZBA-NEXT: sltu s0, t0, a6 +; RV32ZBA-NEXT: mulhu s1, a1, a3 +; RV32ZBA-NEXT: add s1, s1, s0 +; RV32ZBA-NEXT: add a4, s1, a4 +; RV32ZBA-NEXT: mulhu s1, a2, a5 +; RV32ZBA-NEXT: add s1, s1, t2 +; RV32ZBA-NEXT: mul a5, a3, a5 +; RV32ZBA-NEXT: add a5, s1, a5 +; RV32ZBA-NEXT: mul s1, t3, a1 +; RV32ZBA-NEXT: mulhu s0, t3, a0 +; RV32ZBA-NEXT: add s1, s0, s1 +; RV32ZBA-NEXT: add s1, s1, t4 +; RV32ZBA-NEXT: add a5, s1, a5 +; RV32ZBA-NEXT: sltu s1, t5, t4 +; RV32ZBA-NEXT: add a5, a5, s1 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: add a4, a4, s2 +; RV32ZBA-NEXT: srai a5, a7, 31 +; RV32ZBA-NEXT: xor a4, a4, a5 +; RV32ZBA-NEXT: xor a5, t6, a5 +; RV32ZBA-NEXT: or a4, a5, a4 +; RV32ZBA-NEXT: bnez a4, .LBB44_2 ; RV32ZBA-NEXT: # %bb.1: # %entry -; RV32ZBA-NEXT: mv s1, s3 -; RV32ZBA-NEXT: mv s0, s2 +; RV32ZBA-NEXT: mv a0, a2 +; RV32ZBA-NEXT: mv a1, a3 ; RV32ZBA-NEXT: .LBB44_2: # %entry -; RV32ZBA-NEXT: mv a0, s1 -; RV32ZBA-NEXT: mv a1, s0 -; RV32ZBA-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s2, 16(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: addi sp, sp, 32 +; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; ; RV64ZBA-LABEL: smulo.select.i64: @@ -2348,14 +2496,59 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: call __mulodi4@plt -; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: mulhu a4, a0, a2 +; RV32-NEXT: mul a5, a1, a2 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: add a6, a5, a6 +; RV32-NEXT: mul a5, a0, a3 +; RV32-NEXT: add a7, a5, a4 +; RV32-NEXT: sltu a5, a7, a5 +; RV32-NEXT: mulhu a4, a0, a3 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add t0, a6, a4 +; RV32-NEXT: mul t1, a1, a3 +; RV32-NEXT: add a4, t1, t0 +; RV32-NEXT: srai a5, a1, 31 +; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: srai t3, a3, 31 +; RV32-NEXT: mul t4, t3, a0 +; RV32-NEXT: add t5, t4, t2 +; RV32-NEXT: add t6, a4, t5 +; RV32-NEXT: sltu s2, t6, a4 +; RV32-NEXT: sltu a4, a4, t1 +; RV32-NEXT: sltu s0, t0, a6 +; RV32-NEXT: mulhu s1, a1, a3 +; RV32-NEXT: add s1, s1, s0 +; RV32-NEXT: add a4, s1, a4 +; RV32-NEXT: mulhu a2, a2, a5 +; RV32-NEXT: add a2, a2, t2 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: mul a1, t3, a1 +; RV32-NEXT: mulhu a0, t3, a0 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t4 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: sltu a1, t5, t4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: add a0, a0, s2 +; RV32-NEXT: srai a1, a7, 31 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: xor a1, t6, a1 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2372,14 +2565,59 @@ ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 -; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: .cfi_offset ra, -4 -; RV32ZBA-NEXT: sw zero, 8(sp) -; RV32ZBA-NEXT: addi a4, sp, 8 -; RV32ZBA-NEXT: call __mulodi4@plt -; RV32ZBA-NEXT: lw a0, 8(sp) +; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset s0, -4 +; RV32ZBA-NEXT: .cfi_offset s1, -8 +; RV32ZBA-NEXT: .cfi_offset s2, -12 +; RV32ZBA-NEXT: mulhu a4, a0, a2 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: mul a5, a0, a3 +; RV32ZBA-NEXT: add a7, a5, a4 +; RV32ZBA-NEXT: sltu a5, a7, a5 +; RV32ZBA-NEXT: mulhu a4, a0, a3 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: add t0, a6, a4 +; RV32ZBA-NEXT: mul t1, a1, a3 +; RV32ZBA-NEXT: add a4, t1, t0 +; RV32ZBA-NEXT: srai a5, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: srai t3, a3, 31 +; RV32ZBA-NEXT: mul t4, t3, a0 +; RV32ZBA-NEXT: add t5, t4, t2 +; RV32ZBA-NEXT: add t6, a4, t5 +; RV32ZBA-NEXT: sltu s2, t6, a4 +; RV32ZBA-NEXT: sltu a4, a4, t1 +; RV32ZBA-NEXT: sltu s0, t0, a6 +; RV32ZBA-NEXT: mulhu s1, a1, a3 +; RV32ZBA-NEXT: add s1, s1, s0 +; RV32ZBA-NEXT: add a4, s1, a4 +; RV32ZBA-NEXT: mulhu a2, a2, a5 +; RV32ZBA-NEXT: add a2, a2, t2 +; RV32ZBA-NEXT: mul a3, a3, a5 +; RV32ZBA-NEXT: add a2, a2, a3 +; RV32ZBA-NEXT: mul a1, t3, a1 +; RV32ZBA-NEXT: mulhu a0, t3, a0 +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: add a0, a0, t4 +; RV32ZBA-NEXT: add a0, a0, a2 +; RV32ZBA-NEXT: sltu a1, t5, t4 +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: add a0, a4, a0 +; RV32ZBA-NEXT: add a0, a0, s2 +; RV32ZBA-NEXT: srai a1, a7, 31 +; RV32ZBA-NEXT: xor a0, a0, a1 +; RV32ZBA-NEXT: xor a1, t6, a1 +; RV32ZBA-NEXT: or a0, a1, a0 ; RV32ZBA-NEXT: seqz a0, a0 -; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -3240,12 +3478,55 @@ ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: call __mulodi4@plt -; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset s0, -4 +; RV32-NEXT: .cfi_offset s1, -8 +; RV32-NEXT: .cfi_offset s2, -12 +; RV32-NEXT: mulhu a4, a0, a2 +; RV32-NEXT: mul a5, a1, a2 +; RV32-NEXT: add a4, a5, a4 +; RV32-NEXT: sltu a6, a4, a5 +; RV32-NEXT: mulhu a5, a1, a2 +; RV32-NEXT: add a6, a5, a6 +; RV32-NEXT: mul a5, a0, a3 +; RV32-NEXT: add a7, a5, a4 +; RV32-NEXT: sltu a5, a7, a5 +; RV32-NEXT: mulhu a4, a0, a3 +; RV32-NEXT: add a4, a4, a5 +; RV32-NEXT: add t0, a6, a4 +; RV32-NEXT: mul t1, a1, a3 +; RV32-NEXT: add a4, t1, t0 +; RV32-NEXT: srai a5, a1, 31 +; RV32-NEXT: mul t2, a2, a5 +; RV32-NEXT: srai t3, a3, 31 +; RV32-NEXT: mul t4, t3, a0 +; RV32-NEXT: add t5, t4, t2 +; RV32-NEXT: add t6, a4, t5 +; RV32-NEXT: sltu s2, t6, a4 +; RV32-NEXT: sltu a4, a4, t1 +; RV32-NEXT: sltu s0, t0, a6 +; RV32-NEXT: mulhu s1, a1, a3 +; RV32-NEXT: add s1, s1, s0 +; RV32-NEXT: add a4, s1, a4 +; RV32-NEXT: mulhu a2, a2, a5 +; RV32-NEXT: add a2, a2, t2 +; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: mul a1, t3, a1 +; RV32-NEXT: mulhu a0, t3, a0 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a0, t4 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: sltu a1, t5, t4 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: add a0, a4, a0 +; RV32-NEXT: add a0, a0, s2 +; RV32-NEXT: srai a1, a7, 31 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: xor a1, t6, a1 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: beqz a0, .LBB59_2 ; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: mv a0, zero @@ -3253,7 +3534,9 @@ ; RV32-NEXT: .LBB59_2: # %continue ; RV32-NEXT: addi a0, zero, 1 ; RV32-NEXT: .LBB59_3: # %overflow -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3274,12 +3557,55 @@ ; RV32ZBA: # %bb.0: # %entry ; RV32ZBA-NEXT: addi sp, sp, -16 ; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 -; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: .cfi_offset ra, -4 -; RV32ZBA-NEXT: sw zero, 8(sp) -; RV32ZBA-NEXT: addi a4, sp, 8 -; RV32ZBA-NEXT: call __mulodi4@plt -; RV32ZBA-NEXT: lw a0, 8(sp) +; RV32ZBA-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s1, 8(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: sw s2, 4(sp) # 4-byte Folded Spill +; RV32ZBA-NEXT: .cfi_offset s0, -4 +; RV32ZBA-NEXT: .cfi_offset s1, -8 +; RV32ZBA-NEXT: .cfi_offset s2, -12 +; RV32ZBA-NEXT: mulhu a4, a0, a2 +; RV32ZBA-NEXT: mul a5, a1, a2 +; RV32ZBA-NEXT: add a4, a5, a4 +; RV32ZBA-NEXT: sltu a6, a4, a5 +; RV32ZBA-NEXT: mulhu a5, a1, a2 +; RV32ZBA-NEXT: add a6, a5, a6 +; RV32ZBA-NEXT: mul a5, a0, a3 +; RV32ZBA-NEXT: add a7, a5, a4 +; RV32ZBA-NEXT: sltu a5, a7, a5 +; RV32ZBA-NEXT: mulhu a4, a0, a3 +; RV32ZBA-NEXT: add a4, a4, a5 +; RV32ZBA-NEXT: add t0, a6, a4 +; RV32ZBA-NEXT: mul t1, a1, a3 +; RV32ZBA-NEXT: add a4, t1, t0 +; RV32ZBA-NEXT: srai a5, a1, 31 +; RV32ZBA-NEXT: mul t2, a2, a5 +; RV32ZBA-NEXT: srai t3, a3, 31 +; RV32ZBA-NEXT: mul t4, t3, a0 +; RV32ZBA-NEXT: add t5, t4, t2 +; RV32ZBA-NEXT: add t6, a4, t5 +; RV32ZBA-NEXT: sltu s2, t6, a4 +; RV32ZBA-NEXT: sltu a4, a4, t1 +; RV32ZBA-NEXT: sltu s0, t0, a6 +; RV32ZBA-NEXT: mulhu s1, a1, a3 +; RV32ZBA-NEXT: add s1, s1, s0 +; RV32ZBA-NEXT: add a4, s1, a4 +; RV32ZBA-NEXT: mulhu a2, a2, a5 +; RV32ZBA-NEXT: add a2, a2, t2 +; RV32ZBA-NEXT: mul a3, a3, a5 +; RV32ZBA-NEXT: add a2, a2, a3 +; RV32ZBA-NEXT: mul a1, t3, a1 +; RV32ZBA-NEXT: mulhu a0, t3, a0 +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: add a0, a0, t4 +; RV32ZBA-NEXT: add a0, a0, a2 +; RV32ZBA-NEXT: sltu a1, t5, t4 +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: add a0, a4, a0 +; RV32ZBA-NEXT: add a0, a0, s2 +; RV32ZBA-NEXT: srai a1, a7, 31 +; RV32ZBA-NEXT: xor a0, a0, a1 +; RV32ZBA-NEXT: xor a1, t6, a1 +; RV32ZBA-NEXT: or a0, a1, a0 ; RV32ZBA-NEXT: beqz a0, .LBB59_2 ; RV32ZBA-NEXT: # %bb.1: # %overflow ; RV32ZBA-NEXT: mv a0, zero @@ -3287,7 +3613,9 @@ ; RV32ZBA-NEXT: .LBB59_2: # %continue ; RV32ZBA-NEXT: addi a0, zero, 1 ; RV32ZBA-NEXT: .LBB59_3: # %overflow -; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s2, 4(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s1, 8(sp) # 4-byte Folded Reload +; RV32ZBA-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; @@ -3319,25 +3647,50 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) { ; RV32-LABEL: smulo2.br.i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: sw zero, 8(sp) -; RV32-NEXT: addi a2, zero, -13 -; RV32-NEXT: addi a3, zero, -1 -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: call __mulodi4@plt -; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: addi a6, zero, -13 +; RV32-NEXT: mulhu a3, a0, a6 +; RV32-NEXT: mul a4, a1, a6 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: sltu a4, a3, a4 +; RV32-NEXT: mulhu a5, a1, a6 +; RV32-NEXT: add t3, a5, a4 +; RV32-NEXT: sub t0, a3, a0 +; RV32-NEXT: neg t1, a0 +; RV32-NEXT: sltu a2, t0, t1 +; RV32-NEXT: addi a7, zero, -1 +; RV32-NEXT: mulhu t2, a0, a7 +; RV32-NEXT: add a2, t2, a2 +; RV32-NEXT: add a2, t3, a2 +; RV32-NEXT: sub a5, a2, a1 +; RV32-NEXT: srai t6, a1, 31 +; RV32-NEXT: mul a4, t6, a6 +; RV32-NEXT: sub a4, a4, a0 +; RV32-NEXT: add t4, a5, a4 +; RV32-NEXT: sltu t5, t4, a5 +; RV32-NEXT: neg a3, a1 +; RV32-NEXT: sltu a3, a5, a3 +; RV32-NEXT: sltu a2, a2, t3 +; RV32-NEXT: mulhu a5, a1, a7 +; RV32-NEXT: add a2, a5, a2 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: sltu a3, a4, t1 +; RV32-NEXT: mulh a4, t6, a6 +; RV32-NEXT: sub a0, t2, a0 +; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: add a0, a0, a4 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: add a0, a2, a0 +; RV32-NEXT: add a0, a0, t5 +; RV32-NEXT: srai a1, t0, 31 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: xor a1, t4, a1 +; RV32-NEXT: or a0, a1, a0 ; RV32-NEXT: beqz a0, .LBB60_2 ; RV32-NEXT: # %bb.1: # %overflow ; RV32-NEXT: mv a0, zero -; RV32-NEXT: j .LBB60_3 +; RV32-NEXT: ret ; RV32-NEXT: .LBB60_2: # %continue ; RV32-NEXT: addi a0, zero, 1 -; RV32-NEXT: .LBB60_3: # %overflow -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: smulo2.br.i64: @@ -3356,25 +3709,50 @@ ; ; RV32ZBA-LABEL: smulo2.br.i64: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: addi sp, sp, -16 -; RV32ZBA-NEXT: .cfi_def_cfa_offset 16 -; RV32ZBA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ZBA-NEXT: .cfi_offset ra, -4 -; RV32ZBA-NEXT: sw zero, 8(sp) -; RV32ZBA-NEXT: addi a2, zero, -13 -; RV32ZBA-NEXT: addi a3, zero, -1 -; RV32ZBA-NEXT: addi a4, sp, 8 -; RV32ZBA-NEXT: call __mulodi4@plt -; RV32ZBA-NEXT: lw a0, 8(sp) +; RV32ZBA-NEXT: addi a6, zero, -13 +; RV32ZBA-NEXT: mulhu a3, a0, a6 +; RV32ZBA-NEXT: mul a4, a1, a6 +; RV32ZBA-NEXT: add a3, a4, a3 +; RV32ZBA-NEXT: sltu a4, a3, a4 +; RV32ZBA-NEXT: mulhu a5, a1, a6 +; RV32ZBA-NEXT: add t3, a5, a4 +; RV32ZBA-NEXT: sub t0, a3, a0 +; RV32ZBA-NEXT: neg t1, a0 +; RV32ZBA-NEXT: sltu a2, t0, t1 +; RV32ZBA-NEXT: addi a7, zero, -1 +; RV32ZBA-NEXT: mulhu t2, a0, a7 +; RV32ZBA-NEXT: add a2, t2, a2 +; RV32ZBA-NEXT: add a2, t3, a2 +; RV32ZBA-NEXT: sub a5, a2, a1 +; RV32ZBA-NEXT: srai t6, a1, 31 +; RV32ZBA-NEXT: mul a4, t6, a6 +; RV32ZBA-NEXT: sub a4, a4, a0 +; RV32ZBA-NEXT: add t4, a5, a4 +; RV32ZBA-NEXT: sltu t5, t4, a5 +; RV32ZBA-NEXT: neg a3, a1 +; RV32ZBA-NEXT: sltu a3, a5, a3 +; RV32ZBA-NEXT: sltu a2, a2, t3 +; RV32ZBA-NEXT: mulhu a5, a1, a7 +; RV32ZBA-NEXT: add a2, a5, a2 +; RV32ZBA-NEXT: add a2, a2, a3 +; RV32ZBA-NEXT: sltu a3, a4, t1 +; RV32ZBA-NEXT: mulh a4, t6, a6 +; RV32ZBA-NEXT: sub a0, t2, a0 +; RV32ZBA-NEXT: sub a0, a0, a1 +; RV32ZBA-NEXT: add a0, a0, a4 +; RV32ZBA-NEXT: add a0, a0, a3 +; RV32ZBA-NEXT: add a0, a2, a0 +; RV32ZBA-NEXT: add a0, a0, t5 +; RV32ZBA-NEXT: srai a1, t0, 31 +; RV32ZBA-NEXT: xor a0, a0, a1 +; RV32ZBA-NEXT: xor a1, t4, a1 +; RV32ZBA-NEXT: or a0, a1, a0 ; RV32ZBA-NEXT: beqz a0, .LBB60_2 ; RV32ZBA-NEXT: # %bb.1: # %overflow ; RV32ZBA-NEXT: mv a0, zero -; RV32ZBA-NEXT: j .LBB60_3 +; RV32ZBA-NEXT: ret ; RV32ZBA-NEXT: .LBB60_2: # %continue ; RV32ZBA-NEXT: addi a0, zero, 1 -; RV32ZBA-NEXT: .LBB60_3: # %overflow -; RV32ZBA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32ZBA-NEXT: addi sp, sp, 16 ; RV32ZBA-NEXT: ret ; ; RV64ZBA-LABEL: smulo2.br.i64: