Index: include/llvm/ADT/APInt.h =================================================================== --- include/llvm/ADT/APInt.h +++ include/llvm/ADT/APInt.h @@ -1105,6 +1105,10 @@ APInt sshl_ov(const APInt &Amt, bool &Overflow) const; APInt ushl_ov(const APInt &Amt, bool &Overflow) const; + // Operations that saturate + APInt sadd_sat(const APInt &RHS) const; + APInt uadd_sat(const APInt &RHS) const; + /// Array-indexing support. /// /// \returns the bit value at bitPosition Index: lib/Analysis/ConstantFolding.cpp =================================================================== --- lib/Analysis/ConstantFolding.cpp +++ lib/Analysis/ConstantFolding.cpp @@ -1399,6 +1399,8 @@ case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: + case Intrinsic::sadd_sat: + case Intrinsic::uadd_sat: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::bitreverse: @@ -2019,6 +2021,10 @@ }; return ConstantStruct::get(cast(Ty), Ops); } + case Intrinsic::uadd_sat: + return ConstantInt::get(Ty, Op1->getValue().uadd_sat(Op2->getValue())); + case Intrinsic::sadd_sat: + return ConstantInt::get(Ty, Op1->getValue().sadd_sat(Op2->getValue())); case Intrinsic::cttz: if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. return UndefValue::get(Ty); Index: lib/Support/APInt.cpp =================================================================== --- lib/Support/APInt.cpp +++ lib/Support/APInt.cpp @@ -1947,7 +1947,27 @@ return *this << ShAmt; } +APInt APInt::sadd_sat(const APInt &RHS) const { + bool Overflow; + APInt Res = sadd_ov(RHS, Overflow); + if (Overflow) { + if (isNegative()) { + Res = APInt::getSignedMinValue(Res.getBitWidth()); + } else { + Res = APInt::getSignedMaxValue(Res.getBitWidth()); + } + } + return Res; +} +APInt APInt::uadd_sat(const APInt &RHS) const { + bool Overflow; + APInt Res = uadd_ov(RHS, Overflow); + if (Overflow) { + Res = APInt::getMaxValue(Res.getBitWidth()); + } + return Res; +} void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2019,6 +2019,78 @@ break; } + case Intrinsic::uadd_sat: + case Intrinsic::sadd_sat: { + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + if (isa(Arg0) && !isa(Arg1)) { + // Canonicalize constants into the RHS + II->setArgOperand(0, Arg1); + II->setArgOperand(1, Arg0); + return II; + } + + if (auto *C = dyn_cast(Arg1)) { + const APInt Val = C->getValue(); + bool IsUnsigned = II->getIntrinsicID() == Intrinsic::uadd_sat; + + // sat(Arg0 + 0) -> Arg0 + if (Val.isNullValue()) + return replaceInstUsesWith(*II, Arg0); + + // sat(Arg0 uadd MAX) -> MAX + if (IsUnsigned && Val.isMaxValue()) + return replaceInstUsesWith(*II, Arg1); + + // Make use of known overflow information + if (IsUnsigned) { + OverflowResult OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II); + if (OR == OverflowResult::NeverOverflows) + return replaceInstUsesWith(*II, Builder.CreateNUWAdd(Arg0, Arg1)); + + if (OR == OverflowResult::AlwaysOverflows) { + Type *Ty = II->getType(); + return replaceInstUsesWith(*II, ConstantInt::get(Ty, + APInt::getMaxValue(Ty->getPrimitiveSizeInBits()))); + } + } else { + if (willNotOverflowSignedAdd(Arg0, Arg1, *II)) + return replaceInstUsesWith(*II, Builder.CreateNSWAdd(Arg0, Arg1)); + } + + // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2)) + // if Val and Val2 have the same sign + if (auto *Other = dyn_cast(Arg0)) { + Value *X; + const APInt *Val2; + APInt NewVal; + if (Other->getIntrinsicID() == II->getIntrinsicID() && + match(Other->getArgOperand(0), m_Value(X)) && + match(Other->getArgOperand(1), m_APInt(Val2))) { + if (IsUnsigned) + NewVal = Val.uadd_sat(*Val2); + else if (Val.isNonNegative() == Val2->isNonNegative()) { + bool Overflow; + NewVal = Val.sadd_ov(*Val2, Overflow); + if (Overflow) { + // Both adds together may add more than SignedMaxValue + // without saturing the final result + break; + } + } else { + // Cannot fold saturated addition with different signs + break; + } + + return replaceInstUsesWith(*II, Builder.CreateBinaryIntrinsic( + II->getIntrinsicID(), X, + ConstantInt::get(II->getType(), NewVal))); + } + } + } + break; + } + case Intrinsic::minnum: case Intrinsic::maxnum: case Intrinsic::minimum: Index: test/Transforms/InstCombine/saturating-add.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/saturating-add.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare void @dummy(i8) +declare i8 @llvm.uadd.sat.i8(i8, i8) +declare i8 @llvm.sadd.sat.i8(i8, i8) + +; Simple constant folding +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: call void @dummy(i8 30) +; CHECK-NEXT: call void @dummy(i8 -1) +; CHECK-NEXT: call void @dummy(i8 -10) +; CHECK-NEXT: call void @dummy(i8 127) +; CHECK-NEXT: call void @dummy(i8 -128) +; CHECK-NEXT: ret void +; + %x1 = call i8 @llvm.uadd.sat.i8(i8 10, i8 20) + call void @dummy(i8 %x1) + %x2 = call i8 @llvm.uadd.sat.i8(i8 250, i8 100) + call void @dummy(i8 %x2) + + %y1 = call i8 @llvm.sadd.sat.i8(i8 10, i8 -20) + call void @dummy(i8 %y1) + %y2 = call i8 @llvm.sadd.sat.i8(i8 120, i8 10) + call void @dummy(i8 %y2) + %y3 = call i8 @llvm.sadd.sat.i8(i8 -120, i8 -10) + call void @dummy(i8 %y3) + + ret void +} + +; Zero and max folding +define void @test2(i8 %a) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: call void @dummy(i8 [[A:%.*]]) +; CHECK-NEXT: call void @dummy(i8 [[A]]) +; CHECK-NEXT: call void @dummy(i8 -1) +; CHECK-NEXT: call void @dummy(i8 [[A]]) +; CHECK-NEXT: call void @dummy(i8 [[A]]) +; CHECK-NEXT: [[Y3:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 -1) +; CHECK-NEXT: call void @dummy(i8 [[Y3]]) +; CHECK-NEXT: ret void +; + %x1 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 0) + call void @dummy(i8 %x1) + %x2 = call i8 @llvm.uadd.sat.i8(i8 0, i8 %a) + call void @dummy(i8 %x2) + %x3 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 255) + call void @dummy(i8 %x3) + + %y1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 0) + call void @dummy(i8 %y1) + %y2 = call i8 @llvm.sadd.sat.i8(i8 0, i8 %a) + call void @dummy(i8 %y2) + %y3 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 255) + call void @dummy(i8 %y3) + + ret void +} + +; Folding of two saturating adds +define void @test3(i8 %a) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[A:%.*]], i8 30) +; CHECK-NEXT: call void @dummy(i8 [[TMP1]]) +; CHECK-NEXT: call void @dummy(i8 -1) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 30) +; CHECK-NEXT: call void @dummy(i8 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 -30) +; CHECK-NEXT: call void @dummy(i8 [[TMP3]]) +; CHECK-NEXT: [[V1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 10) +; CHECK-NEXT: [[V2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[V1]], i8 -20) +; CHECK-NEXT: call void @dummy(i8 [[V2]]) +; CHECK-NEXT: [[W1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 100) +; CHECK-NEXT: [[W2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[W1]], i8 100) +; CHECK-NEXT: call void @dummy(i8 [[W2]]) +; CHECK-NEXT: ret void +; + %x1 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 10) + %x2 = call i8 @llvm.uadd.sat.i8(i8 %x1, i8 20) + call void @dummy(i8 %x2) + + %y1 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 100) + %y2 = call i8 @llvm.uadd.sat.i8(i8 %y1, i8 200) + call void @dummy(i8 %y2) + + %z1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 10) + %z2 = call i8 @llvm.sadd.sat.i8(i8 %z1, i8 20) + call void @dummy(i8 %z2) + + %u1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 -10) + %u2 = call i8 @llvm.sadd.sat.i8(i8 %u1, i8 -20) + call void @dummy(i8 %u2) + + %v1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 10) + %v2 = call i8 @llvm.sadd.sat.i8(i8 %v1, i8 -20) + call void @dummy(i8 %v2) + + %w1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 100) + %w2 = call i8 @llvm.sadd.sat.i8(i8 %w1, i8 100) + call void @dummy(i8 %w2) + + ret void +} + +; Use of known overflow/no-overflow information +define void @test4(i8 %a) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[A_NEG:%.*]] = or i8 [[A:%.*]], -128 +; CHECK-NEXT: [[A_NNEG:%.*]] = and i8 [[A]], 127 +; CHECK-NEXT: call void @dummy(i8 -1) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[A_NNEG]], 10 +; CHECK-NEXT: call void @dummy(i8 [[TMP1]]) +; CHECK-NEXT: [[Y1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A_NEG]], i8 -10) +; CHECK-NEXT: call void @dummy(i8 [[Y1]]) +; CHECK-NEXT: [[Y2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A_NNEG]], i8 10) +; CHECK-NEXT: call void @dummy(i8 [[Y2]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[A_NEG]], 10 +; CHECK-NEXT: call void @dummy(i8 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i8 [[A_NNEG]], -10 +; CHECK-NEXT: call void @dummy(i8 [[TMP3]]) +; CHECK-NEXT: ret void +; + %a_neg = or i8 %a, -128 + %a_nneg = and i8 %a, 127 + + %x1 = call i8 @llvm.uadd.sat.i8(i8 %a_neg, i8 -10) + call void @dummy(i8 %x1) + + %x2 = call i8 @llvm.uadd.sat.i8(i8 %a_nneg, i8 10) + call void @dummy(i8 %x2) + + %y1 = call i8 @llvm.sadd.sat.i8(i8 %a_neg, i8 -10) + call void @dummy(i8 %y1) + + %y2 = call i8 @llvm.sadd.sat.i8(i8 %a_nneg, i8 10) + call void @dummy(i8 %y2) + + %y3 = call i8 @llvm.sadd.sat.i8(i8 %a_neg, i8 10) + call void @dummy(i8 %y3) + + %y4 = call i8 @llvm.sadd.sat.i8(i8 %a_nneg, i8 -10) + call void @dummy(i8 %y4) + + ret void +}