diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -496,7 +496,8 @@ if (VT == MVT::i8 || VT == MVT::i16) return false; - return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed); + // Pass ISD::UADDO to bypass the default behavior disabling ISD::USUBO. + return TargetLowering::shouldFormOverflowOp(ISD::UADDO, VT, MathUsed); } bool convertSetCCLogicToBitwiseLogic(EVT VT) const override { diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -1075,14 +1075,15 @@ ; RV32I-NEXT: bnez a0, .LBB6_4 ; RV32I-NEXT: .LBB6_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: seqz a0, a3 -; RV32I-NEXT: sltu a1, s1, a3 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: addi a0, a3, -1 +; RV32I-NEXT: sltu a1, a3, a0 +; RV32I-NEXT: sltu a2, s1, a3 +; RV32I-NEXT: or a1, a1, a2 ; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: bnez a0, .LBB6_1 +; RV32I-NEXT: bnez a1, .LBB6_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV32I-NEXT: addi a2, a3, -1 +; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: j .LBB6_1 ; RV32I-NEXT: .LBB6_4: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -1114,14 +1115,15 @@ ; RV32IA-NEXT: # =>This Loop Header: Depth=1 ; RV32IA-NEXT: # Child Loop BB6_5 Depth 2 ; RV32IA-NEXT: mv a3, a2 -; RV32IA-NEXT: seqz a2, a2 -; RV32IA-NEXT: sltu a4, a1, a3 -; RV32IA-NEXT: or a2, a2, a4 +; RV32IA-NEXT: addi a2, a2, -1 +; RV32IA-NEXT: sltu a4, a3, a2 +; RV32IA-NEXT: sltu a5, a1, a3 +; RV32IA-NEXT: or a5, a4, a5 ; RV32IA-NEXT: mv a4, a1 -; RV32IA-NEXT: bnez a2, .LBB6_1 +; RV32IA-NEXT: bnez a5, .LBB6_1 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV32IA-NEXT: addi a4, a3, -1 +; RV32IA-NEXT: mv a4, a2 ; RV32IA-NEXT: j .LBB6_1 ; RV32IA-NEXT: .LBB6_4: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 @@ -1156,14 +1158,15 @@ ; RV64I-NEXT: bnez a0, .LBB6_4 ; RV64I-NEXT: .LBB6_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: seqz a0, a3 -; RV64I-NEXT: sltu a1, s2, a3 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: addiw a0, a3, -1 +; RV64I-NEXT: sltu a1, a3, a0 +; RV64I-NEXT: sltu a2, s2, a3 +; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: mv a2, s1 -; RV64I-NEXT: bnez a0, .LBB6_1 +; RV64I-NEXT: bnez a1, .LBB6_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV64I-NEXT: addiw a2, a3, -1 +; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: j .LBB6_1 ; RV64I-NEXT: .LBB6_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -1188,23 +1191,24 @@ ; RV64IA-NEXT: bne a2, a4, .LBB6_7 ; RV64IA-NEXT: # %bb.6: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB6_5 Depth=2 -; RV64IA-NEXT: sc.w.aqrl a6, a5, (a0) -; RV64IA-NEXT: bnez a6, .LBB6_5 +; RV64IA-NEXT: sc.w.aqrl a5, a6, (a0) +; RV64IA-NEXT: bnez a5, .LBB6_5 ; RV64IA-NEXT: .LBB6_7: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 ; RV64IA-NEXT: beq a2, a4, .LBB6_4 ; RV64IA-NEXT: .LBB6_2: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB6_5 Depth 2 +; RV64IA-NEXT: addiw a5, a2, -1 ; RV64IA-NEXT: sext.w a4, a2 -; RV64IA-NEXT: seqz a5, a4 +; RV64IA-NEXT: sltu a2, a4, a5 ; RV64IA-NEXT: sltu a6, a3, a4 -; RV64IA-NEXT: or a6, a5, a6 -; RV64IA-NEXT: mv a5, a1 -; RV64IA-NEXT: bnez a6, .LBB6_1 +; RV64IA-NEXT: or a2, a2, a6 +; RV64IA-NEXT: mv a6, a1 +; RV64IA-NEXT: bnez a2, .LBB6_1 ; RV64IA-NEXT: # %bb.3: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV64IA-NEXT: addiw a5, a2, -1 +; RV64IA-NEXT: mv a6, a5 ; RV64IA-NEXT: j .LBB6_1 ; RV64IA-NEXT: .LBB6_4: # %atomicrmw.end ; RV64IA-NEXT: mv a0, a2 @@ -1243,31 +1247,38 @@ ; RV32I-NEXT: call __atomic_compare_exchange_8@plt ; RV32I-NEXT: lw a5, 12(sp) ; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: bnez a0, .LBB7_7 +; RV32I-NEXT: bnez a0, .LBB7_9 ; RV32I-NEXT: .LBB7_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s1, .LBB7_4 +; RV32I-NEXT: addi a0, a4, -1 +; RV32I-NEXT: seqz a1, a4 +; RV32I-NEXT: sub a1, a5, a1 +; RV32I-NEXT: bnez a4, .LBB7_5 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s1, a5 -; RV32I-NEXT: j .LBB7_5 +; RV32I-NEXT: sltu a2, a5, a1 +; RV32I-NEXT: bne a5, s1, .LBB7_6 ; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 -; RV32I-NEXT: .LBB7_5: # %atomicrmw.start +; RV32I-NEXT: sltu a3, s2, a4 +; RV32I-NEXT: j .LBB7_7 +; RV32I-NEXT: .LBB7_5: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: sltu a2, a4, a0 +; RV32I-NEXT: beq a5, s1, .LBB7_4 +; RV32I-NEXT: .LBB7_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: or a1, a4, a5 -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: sltu a3, s1, a5 +; RV32I-NEXT: .LBB7_7: # %atomicrmw.start +; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: or a6, a2, a3 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: mv a3, s1 -; RV32I-NEXT: bnez a0, .LBB7_1 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a6, .LBB7_1 +; RV32I-NEXT: # %bb.8: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: seqz a0, a4 -; RV32I-NEXT: sub a3, a5, a0 -; RV32I-NEXT: addi a2, a4, -1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: j .LBB7_1 -; RV32I-NEXT: .LBB7_7: # %atomicrmw.end +; RV32I-NEXT: .LBB7_9: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1306,31 +1317,38 @@ ; RV32IA-NEXT: call __atomic_compare_exchange_8@plt ; RV32IA-NEXT: lw a5, 12(sp) ; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: bnez a0, .LBB7_7 +; RV32IA-NEXT: bnez a0, .LBB7_9 ; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s1, .LBB7_4 +; RV32IA-NEXT: addi a0, a4, -1 +; RV32IA-NEXT: seqz a1, a4 +; RV32IA-NEXT: sub a1, a5, a1 +; RV32IA-NEXT: bnez a4, .LBB7_5 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s1, a5 -; RV32IA-NEXT: j .LBB7_5 +; RV32IA-NEXT: sltu a2, a5, a1 +; RV32IA-NEXT: bne a5, s1, .LBB7_6 ; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 -; RV32IA-NEXT: .LBB7_5: # %atomicrmw.start +; RV32IA-NEXT: sltu a3, s2, a4 +; RV32IA-NEXT: j .LBB7_7 +; RV32IA-NEXT: .LBB7_5: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: sltu a2, a4, a0 +; RV32IA-NEXT: beq a5, s1, .LBB7_4 +; RV32IA-NEXT: .LBB7_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: or a1, a4, a5 -; RV32IA-NEXT: seqz a1, a1 -; RV32IA-NEXT: or a0, a1, a0 +; RV32IA-NEXT: sltu a3, s1, a5 +; RV32IA-NEXT: .LBB7_7: # %atomicrmw.start +; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: or a6, a2, a3 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: mv a3, s1 -; RV32IA-NEXT: bnez a0, .LBB7_1 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a6, .LBB7_1 +; RV32IA-NEXT: # %bb.8: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: seqz a0, a4 -; RV32IA-NEXT: sub a3, a5, a0 -; RV32IA-NEXT: addi a2, a4, -1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: j .LBB7_1 -; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end +; RV32IA-NEXT: .LBB7_9: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 ; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -1366,14 +1384,15 @@ ; RV64I-NEXT: bnez a0, .LBB7_4 ; RV64I-NEXT: .LBB7_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: seqz a0, a3 -; RV64I-NEXT: sltu a1, s1, a3 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: addi a0, a3, -1 +; RV64I-NEXT: sltu a1, a3, a0 +; RV64I-NEXT: sltu a2, s1, a3 +; RV64I-NEXT: or a1, a1, a2 ; RV64I-NEXT: mv a2, s1 -; RV64I-NEXT: bnez a0, .LBB7_1 +; RV64I-NEXT: bnez a1, .LBB7_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV64I-NEXT: addi a2, a3, -1 +; RV64I-NEXT: mv a2, a0 ; RV64I-NEXT: j .LBB7_1 ; RV64I-NEXT: .LBB7_4: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -1405,14 +1424,15 @@ ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB7_5 Depth 2 ; RV64IA-NEXT: mv a3, a2 -; RV64IA-NEXT: seqz a2, a2 -; RV64IA-NEXT: sltu a4, a1, a3 -; RV64IA-NEXT: or a2, a2, a4 +; RV64IA-NEXT: addi a2, a2, -1 +; RV64IA-NEXT: sltu a4, a3, a2 +; RV64IA-NEXT: sltu a5, a1, a3 +; RV64IA-NEXT: or a5, a4, a5 ; RV64IA-NEXT: mv a4, a1 -; RV64IA-NEXT: bnez a2, .LBB7_1 +; RV64IA-NEXT: bnez a5, .LBB7_1 ; RV64IA-NEXT: # %bb.3: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV64IA-NEXT: addi a4, a3, -1 +; RV64IA-NEXT: mv a4, a2 ; RV64IA-NEXT: j .LBB7_1 ; RV64IA-NEXT: .LBB7_4: # %atomicrmw.end ; RV64IA-NEXT: mv a0, a2 diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -774,24 +774,26 @@ define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { ; RV32-LABEL: usubo_ult_i64_math_overflow_used: ; RV32: # %bb.0: -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: sltu a0, a0, a2 -; RV32-NEXT: sub a6, a1, a3 -; RV32-NEXT: sub a6, a6, a0 -; RV32-NEXT: sub a5, a5, a2 -; RV32-NEXT: sw a5, 0(a4) -; RV32-NEXT: sw a6, 4(a4) -; RV32-NEXT: beq a1, a3, .LBB22_2 +; RV32-NEXT: sltu a5, a0, a2 +; RV32-NEXT: sub a3, a1, a3 +; RV32-NEXT: sub a3, a3, a5 +; RV32-NEXT: sub a2, a0, a2 +; RV32-NEXT: beq a3, a1, .LBB22_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: sltu a0, a1, a3 +; RV32-NEXT: j .LBB22_3 ; RV32-NEXT: .LBB22_2: +; RV32-NEXT: sltu a0, a0, a2 +; RV32-NEXT: .LBB22_3: +; RV32-NEXT: sw a2, 0(a4) +; RV32-NEXT: sw a3, 4(a4) ; RV32-NEXT: ret ; ; RV64-LABEL: usubo_ult_i64_math_overflow_used: ; RV64: # %bb.0: -; RV64-NEXT: sub a3, a0, a1 +; RV64-NEXT: sub a1, a0, a1 ; RV64-NEXT: sltu a0, a0, a1 -; RV64-NEXT: sd a3, 0(a2) +; RV64-NEXT: sd a1, 0(a2) ; RV64-NEXT: ret %s = sub i64 %x, %y store i64 %s, ptr %p @@ -804,20 +806,17 @@ define i1 @usubo_ugt_i32(i32 %x, i32 %y, ptr %p) { ; RV32-LABEL: usubo_ugt_i32: ; RV32: # %bb.0: -; RV32-NEXT: sltu a3, a0, a1 -; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: sw a0, 0(a2) -; RV32-NEXT: mv a0, a3 +; RV32-NEXT: sub a1, a0, a1 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: sw a1, 0(a2) ; RV32-NEXT: ret ; ; RV64-LABEL: usubo_ugt_i32: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a3, a1 -; RV64-NEXT: sext.w a4, a0 -; RV64-NEXT: sltu a3, a4, a3 -; RV64-NEXT: subw a0, a0, a1 -; RV64-NEXT: sw a0, 0(a2) -; RV64-NEXT: mv a0, a3 +; RV64-NEXT: subw a1, a0, a1 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: sltu a0, a0, a1 +; RV64-NEXT: sw a1, 0(a2) ; RV64-NEXT: ret %ov = icmp ugt i32 %y, %x %s = sub i32 %x, %y @@ -939,16 +938,16 @@ ; RV32-LABEL: usubo_eq_constant1_op1_i32: ; RV32: # %bb.0: ; RV32-NEXT: addi a2, a0, -1 -; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sw a2, 0(a1) ; RV32-NEXT: ret ; ; RV64-LABEL: usubo_eq_constant1_op1_i32: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a2, a0 -; RV64-NEXT: addiw a3, a0, -1 -; RV64-NEXT: seqz a0, a2 -; RV64-NEXT: sw a3, 0(a1) +; RV64-NEXT: addiw a2, a0, -1 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: sltu a0, a0, a2 +; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret %s = add i32 %x, -1 %ov = icmp eq i32 %x, 0 @@ -962,16 +961,15 @@ ; RV32-LABEL: usubo_ne_constant0_op1_i32: ; RV32: # %bb.0: ; RV32-NEXT: neg a2, a0 -; RV32-NEXT: snez a0, a0 +; RV32-NEXT: snez a0, a2 ; RV32-NEXT: sw a2, 0(a1) ; RV32-NEXT: ret ; ; RV64-LABEL: usubo_ne_constant0_op1_i32: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a2, a0 -; RV64-NEXT: negw a3, a0 +; RV64-NEXT: negw a2, a0 ; RV64-NEXT: snez a0, a2 -; RV64-NEXT: sw a3, 0(a1) +; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret %s = sub i32 0, %x %ov = icmp ne i32 %x, 0 @@ -1058,41 +1056,43 @@ ; RV32-NEXT: .cfi_offset s4, -24 ; RV32-NEXT: .cfi_offset s5, -28 ; RV32-NEXT: .cfi_offset s6, -32 -; RV32-NEXT: mv s4, a5 +; RV32-NEXT: mv s1, a5 ; RV32-NEXT: andi a5, a5, 1 -; RV32-NEXT: beqz a5, .LBB31_8 +; RV32-NEXT: beqz a5, .LBB31_7 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: mv s0, a4 -; RV32-NEXT: mv s3, a3 -; RV32-NEXT: mv s1, a2 -; RV32-NEXT: mv s5, a1 -; RV32-NEXT: mv s2, a0 +; RV32-NEXT: mv s5, a3 +; RV32-NEXT: mv s4, a2 +; RV32-NEXT: mv s2, a1 +; RV32-NEXT: mv s3, a0 ; RV32-NEXT: beq a1, a3, .LBB31_3 ; RV32-NEXT: # %bb.2: # %t -; RV32-NEXT: sltu s6, s5, s3 +; RV32-NEXT: sltu s6, s2, s5 ; RV32-NEXT: j .LBB31_4 ; RV32-NEXT: .LBB31_3: -; RV32-NEXT: sltu s6, s2, s1 +; RV32-NEXT: sltu s6, s3, s4 ; RV32-NEXT: .LBB31_4: # %t ; RV32-NEXT: mv a0, s6 ; RV32-NEXT: call call@plt -; RV32-NEXT: beqz s6, .LBB31_8 +; RV32-NEXT: beqz s6, .LBB31_7 ; RV32-NEXT: # %bb.5: # %end -; RV32-NEXT: sltu a1, s2, s1 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beq s5, s3, .LBB31_7 +; RV32-NEXT: sltu a0, s3, s4 +; RV32-NEXT: sub a1, s2, s5 +; RV32-NEXT: sub a1, a1, a0 +; RV32-NEXT: sub a2, s3, s4 +; RV32-NEXT: beq a1, s2, .LBB31_8 ; RV32-NEXT: # %bb.6: # %end -; RV32-NEXT: sltu a0, s5, s3 -; RV32-NEXT: .LBB31_7: # %end -; RV32-NEXT: sub a2, s5, s3 -; RV32-NEXT: sub a2, a2, a1 -; RV32-NEXT: sub a1, s2, s1 -; RV32-NEXT: sw a1, 0(s0) -; RV32-NEXT: sw a2, 4(s0) +; RV32-NEXT: sltu a0, s2, a1 ; RV32-NEXT: j .LBB31_9 -; RV32-NEXT: .LBB31_8: # %f -; RV32-NEXT: mv a0, s4 -; RV32-NEXT: .LBB31_9: # %f +; RV32-NEXT: .LBB31_7: # %f +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: j .LBB31_10 +; RV32-NEXT: .LBB31_8: +; RV32-NEXT: sltu a0, s3, a2 +; RV32-NEXT: .LBB31_9: # %end +; RV32-NEXT: sw a2, 0(s0) +; RV32-NEXT: sw a1, 4(s0) +; RV32-NEXT: .LBB31_10: # %f ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1113,28 +1113,25 @@ ; RV64-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: sd s4, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: .cfi_offset s0, -16 ; RV64-NEXT: .cfi_offset s1, -24 ; RV64-NEXT: .cfi_offset s2, -32 ; RV64-NEXT: .cfi_offset s3, -40 -; RV64-NEXT: .cfi_offset s4, -48 ; RV64-NEXT: mv s0, a3 ; RV64-NEXT: andi a3, a3, 1 ; RV64-NEXT: beqz a3, .LBB31_3 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: mv s1, a2 -; RV64-NEXT: mv s2, a1 -; RV64-NEXT: mv s3, a0 -; RV64-NEXT: sltu s4, a0, a1 -; RV64-NEXT: mv a0, s4 +; RV64-NEXT: mv s3, a1 +; RV64-NEXT: mv s2, a0 +; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: call call@plt -; RV64-NEXT: bgeu s3, s2, .LBB31_3 +; RV64-NEXT: bgeu s2, s3, .LBB31_3 ; RV64-NEXT: # %bb.2: # %end -; RV64-NEXT: sub a0, s3, s2 -; RV64-NEXT: sd a0, 0(s1) -; RV64-NEXT: mv a0, s4 +; RV64-NEXT: sub a1, s2, s3 +; RV64-NEXT: sltu a0, s2, a1 +; RV64-NEXT: sd a1, 0(s1) ; RV64-NEXT: j .LBB31_4 ; RV64-NEXT: .LBB31_3: # %f ; RV64-NEXT: mv a0, s0 @@ -1144,7 +1141,6 @@ ; RV64-NEXT: ld s1, 24(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s2, 16(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s3, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: ld s4, 0(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 48 ; RV64-NEXT: ret entry: