diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9737,6 +9737,37 @@ return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } + if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) { + APInt MinVal = APInt::getSignedMinValue(BitWidth); + APInt MaxVal = APInt::getSignedMaxValue(BitWidth); + + KnownBits KnownLHS = DAG.computeKnownBits(LHS); + KnownBits KnownRHS = DAG.computeKnownBits(RHS); + + // If either of the operand signs are known, then they are guaranteed to + // only saturate in one direction. If non-negative they will saturate + // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN. + // + // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the + // sign of 'y' has to be flipped. + + bool LHSIsNonNegative = KnownLHS.isNonNegative(); + bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative() + : KnownRHS.isNegative(); + if (LHSIsNonNegative || RHSIsNonNegative) { + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff); + } + + bool LHSIsNegative = KnownLHS.isNegative(); + bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative() + : KnownRHS.isNonNegative(); + if (LHSIsNegative || RHSIsNegative) { + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff); + } + } + // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff APInt MinVal = APInt::getSignedMinValue(BitWidth); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); diff --git a/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll b/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-saturating-arithmetic.ll @@ -0,0 +1,209 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O3 -o - %s | FileCheck %s --check-prefixes=CHECK + +define i64 @test_ssub_nonneg_rhs(i64 %x) { +; CHECK-LABEL: test_ssub_nonneg_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 1) + ret i64 %sat +} + +define i64 @test_ssub_neg_rhs(i64 %x) { +; CHECK-LABEL: test_ssub_neg_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 -1) + ret i64 %sat +} + +define i64 @test_sadd_nonneg_rhs(i64 %x) { +; CHECK-LABEL: test_sadd_nonneg_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 1) + ret i64 %sat +} + + +define i64 @test_sadd_neg_rhs(i64 %x) { +; CHECK-LABEL: test_sadd_neg_rhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 -1) + ret i64 %sat +} + +define i64 @test_ssub_nonneg_lhs(i64 %x) { +; CHECK-LABEL: test_ssub_nonneg_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov x9, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: subs x8, x8, x0 +; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.ssub.sat.i64(i64 1, i64 %x) + ret i64 %sat +} + +define i64 @test_ssub_neg_lhs(i64 %x) { +; CHECK-LABEL: test_ssub_neg_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x8, x8, x0 +; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.ssub.sat.i64(i64 -1, i64 %x) + ret i64 %sat +} + +define i64 @test_sadd_nonneg_lhs(i64 %x) { +; CHECK-LABEL: test_sadd_nonneg_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: adds x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.sadd.sat.i64(i64 1, i64 %x) + ret i64 %sat +} + +define i64 @test_sadd_neg_lhs(i64 %x) { +; CHECK-LABEL: test_sadd_neg_lhs: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: subs x9, x0, #1 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %sat = call i64 @llvm.sadd.sat.i64(i64 -1, i64 %x) + ret i64 %sat +} + +define i64 @test_ssub_nonneg_rhs_nonconst(i64 %x) { +; CHECK-LABEL: test_ssub_nonneg_rhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: mov x9, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: and x8, x0, x8 +; CHECK-NEXT: subs x8, x0, x8 +; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: ret + %y = and i64 %x, 123 + %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y) + ret i64 %sat +} + +define i64 @test_ssub_neg_rhs_nonconst(i64 %x) { +; CHECK-LABEL: test_ssub_neg_rhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: cmn x0, #1 +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinv x9, x0, xzr, lt +; CHECK-NEXT: subs x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %y = call i64 @llvm.smin(i64 %x, i64 -1) + %sat = call i64 @llvm.ssub.sat.i64(i64 %x, i64 %y) + ret i64 %sat +} + +define i64 @test_sadd_nonneg_rhs_nonconst(i64 %x) { +; CHECK-LABEL: test_sadd_nonneg_rhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #1 +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinc x9, x0, xzr, gt +; CHECK-NEXT: adds x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %y = call i64 @llvm.smax(i64 %x, i64 1) + %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) + ret i64 %sat +} + + +define i64 @test_sadd_neg_rhs_nonconst(i64 %x) { +; CHECK-LABEL: test_sadd_neg_rhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x9, x0, #0x8000000000000000 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: adds x9, x0, x9 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %y = or i64 %x, u0x8000000000000000 + %sat = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) + ret i64 %sat +} + +define i64 @test_ssub_nonneg_lhs_nonconst(i64 %x) { +; CHECK-LABEL: test_ssub_nonneg_lhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #123 // =0x7b +; CHECK-NEXT: mov x9, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: and x8, x0, x8 +; CHECK-NEXT: subs x8, x8, x0 +; CHECK-NEXT: csel x0, x9, x8, vs +; CHECK-NEXT: ret + %y = and i64 %x, 123 + %sat = call i64 @llvm.ssub.sat.i64(i64 %y, i64 %x) + ret i64 %sat +} + +define i64 @test_ssub_neg_lhs_nonconst(i64 %x) { +; CHECK-LABEL: test_ssub_neg_lhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: cmn x0, #1 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: csinv x9, x0, xzr, lt +; CHECK-NEXT: subs x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %y = call i64 @llvm.smin(i64 %x, i64 -1) + %sat = call i64 @llvm.ssub.sat.i64(i64 %y, i64 %x) + ret i64 %sat +} + +define i64 @test_sadd_nonneg_lhs_nonconst(i64 %x) { +; CHECK-LABEL: test_sadd_nonneg_lhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #1 +; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: csinc x9, x0, xzr, gt +; CHECK-NEXT: adds x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %y = call i64 @llvm.smax(i64 %x, i64 1) + %sat = call i64 @llvm.sadd.sat.i64(i64 %y, i64 %x) + ret i64 %sat +} + +define i64 @test_sadd_neg_lhs_nonconst(i64 %x) { +; CHECK-LABEL: test_sadd_neg_lhs_nonconst: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x9, x0, #0x8000000000000000 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: adds x9, x9, x0 +; CHECK-NEXT: csel x0, x8, x9, vs +; CHECK-NEXT: ret + %y = or i64 %x, u0x8000000000000000 + %sat = call i64 @llvm.sadd.sat.i64(i64 %y, i64 %x) + ret i64 %sat +} + +declare i64 @llvm.sadd.sat.i64(i64, i64) +declare i64 @llvm.ssub.sat.i64(i64, i64) +declare i64 @llvm.smax(i64, i64) +declare i64 @llvm.smin(i64, i64) diff --git a/llvm/test/CodeGen/X86/combine-add-ssat.ll b/llvm/test/CodeGen/X86/combine-add-ssat.ll --- a/llvm/test/CodeGen/X86/combine-add-ssat.ll +++ b/llvm/test/CodeGen/X86/combine-add-ssat.ll @@ -77,11 +77,8 @@ define i32 @combine_constant_i32(i32 %a0) { ; CHECK-LABEL: combine_constant_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal 1(%rdi), %eax -; CHECK-NEXT: sarl $31, %eax -; CHECK-NEXT: addl $-2147483648, %eax # imm = 0x80000000 ; CHECK-NEXT: incl %edi +; CHECK-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF ; CHECK-NEXT: cmovnol %edi, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.sadd.sat.i32(i32 1, i32 %a0)