Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -4001,13 +4001,11 @@ if (LHSKnown.isNegative() && RHSKnown.isNegative()) { // The sign bit is set in both cases: this MUST overflow. - // Create a simple add instruction, and insert it into the struct. return OverflowResult::AlwaysOverflows; } if (LHSKnown.isNonNegative() && RHSKnown.isNonNegative()) { // The sign bit is clear in both cases: this CANNOT overflow. - // Create a simple add instruction, and insert it into the struct. return OverflowResult::NeverOverflows; } } @@ -4124,11 +4122,18 @@ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - // If the LHS is negative and the RHS is non-negative, no unsigned wrap. KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); - if (LHSKnown.isNegative() && RHSKnown.isNonNegative()) - return OverflowResult::NeverOverflows; + if (LHSKnown.isNonNegative() || LHSKnown.isNegative()) { + KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); + + // If the LHS is negative and the RHS is non-negative, no unsigned wrap. + if (LHSKnown.isNegative() && RHSKnown.isNonNegative()) + return OverflowResult::NeverOverflows; + + // If the LHS is non-negative and the RHS negative, we always wrap. + if (LHSKnown.isNonNegative() && RHSKnown.isNegative()) + return OverflowResult::AlwaysOverflows; + } return OverflowResult::MayOverflow; } Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2019,6 +2019,89 @@ break; } + case Intrinsic::uadd_sat: + case Intrinsic::sadd_sat: + if (isa(II->getArgOperand(0)) && + !isa(II->getArgOperand(1))) { + // Canonicalize constants into the RHS. + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); + return II; + } + LLVM_FALLTHROUGH; + case Intrinsic::usub_sat: + case Intrinsic::ssub_sat: { + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + Intrinsic::ID IID = II->getIntrinsicID(); + + // Make use of known overflow information. + OverflowResult OR; + switch (IID) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::uadd_sat: + OR = computeOverflowForUnsignedAdd(Arg0, Arg1, II); + if (OR == OverflowResult::NeverOverflows) + return replaceInstUsesWith(*II, Builder.CreateNUWAdd(Arg0, Arg1)); + if (OR == OverflowResult::AlwaysOverflows) + return replaceInstUsesWith(*II, + ConstantInt::getAllOnesValue(II->getType())); + break; + case Intrinsic::usub_sat: + OR = computeOverflowForUnsignedSub(Arg0, Arg1, II); + if (OR == OverflowResult::NeverOverflows) + return replaceInstUsesWith(*II, Builder.CreateNUWSub(Arg0, Arg1)); + if (OR == OverflowResult::AlwaysOverflows) + return replaceInstUsesWith(*II, + ConstantInt::getNullValue(II->getType())); + break; + case Intrinsic::sadd_sat: + if (willNotOverflowSignedAdd(Arg0, Arg1, *II)) + return replaceInstUsesWith(*II, Builder.CreateNSWAdd(Arg0, Arg1)); + break; + case Intrinsic::ssub_sat: + if (willNotOverflowSignedSub(Arg0, Arg1, *II)) + return replaceInstUsesWith(*II, Builder.CreateNSWSub(Arg0, Arg1)); + break; + } + + // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2)) + // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2)) + // if Val and Val2 have the same sign + if (auto *Other = dyn_cast(Arg0)) { + Value *X; + const APInt *Val, *Val2; + APInt NewVal; + bool IsUnsigned = IID == Intrinsic::uadd_sat + || IID == Intrinsic::usub_sat; + if (Other->getIntrinsicID() == II->getIntrinsicID() && + match(Arg1, m_APInt(Val)) && + match(Other->getArgOperand(0), m_Value(X)) && + match(Other->getArgOperand(1), m_APInt(Val2))) { + if (IsUnsigned) + NewVal = Val->uadd_sat(*Val2); + else if (Val->isNonNegative() == Val2->isNonNegative()) { + bool Overflow; + NewVal = Val->sadd_ov(*Val2, Overflow); + if (Overflow) { + // Both adds together may add more than SignedMaxValue + // without saturating the final result. + break; + } + } else { + // Cannot fold saturated addition with different signs. + break; + } + + return replaceInstUsesWith(*II, Builder.CreateBinaryIntrinsic( + II->getIntrinsicID(), X, + ConstantInt::get(II->getType(), NewVal))); + } + } + break; + } + case Intrinsic::minnum: case Intrinsic::maxnum: case Intrinsic::minimum: Index: test/Transforms/InstCombine/saturating-add.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/saturating-add.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare void @dummy(i8) +declare void @dummy_vec(<2 x i8>) +declare i8 @llvm.uadd.sat.i8(i8, i8) +declare i8 @llvm.sadd.sat.i8(i8, i8) +declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) +declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) + +; Folding of two saturating adds +define void @test_add_scalar_combine(i8 %a) { +; CHECK-LABEL: @test_add_scalar_combine( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[A:%.*]], i8 30) +; CHECK-NEXT: call void @dummy(i8 [[TMP1]]) +; CHECK-NEXT: call void @dummy(i8 -1) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 30) +; CHECK-NEXT: call void @dummy(i8 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 -30) +; CHECK-NEXT: call void @dummy(i8 [[TMP3]]) +; CHECK-NEXT: [[V1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 10) +; CHECK-NEXT: [[V2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[V1]], i8 -20) +; CHECK-NEXT: call void @dummy(i8 [[V2]]) +; CHECK-NEXT: [[W1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A]], i8 100) +; CHECK-NEXT: [[W2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[W1]], i8 100) +; CHECK-NEXT: call void @dummy(i8 [[W2]]) +; CHECK-NEXT: ret void +; + %x1 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 10) + %x2 = call i8 @llvm.uadd.sat.i8(i8 %x1, i8 20) + call void @dummy(i8 %x2) + + %y1 = call i8 @llvm.uadd.sat.i8(i8 %a, i8 100) + %y2 = call i8 @llvm.uadd.sat.i8(i8 %y1, i8 200) + call void @dummy(i8 %y2) + + %z1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 10) + %z2 = call i8 @llvm.sadd.sat.i8(i8 %z1, i8 20) + call void @dummy(i8 %z2) + + %u1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 -10) + %u2 = call i8 @llvm.sadd.sat.i8(i8 %u1, i8 -20) + call void @dummy(i8 %u2) + + %v1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 10) + %v2 = call i8 @llvm.sadd.sat.i8(i8 %v1, i8 -20) + call void @dummy(i8 %v2) + + %w1 = call i8 @llvm.sadd.sat.i8(i8 %a, i8 100) + %w2 = call i8 @llvm.sadd.sat.i8(i8 %w1, i8 100) + call void @dummy(i8 %w2) + + ret void +} + +; Use of known overflow/no-overflow information +define void @test_add_scalar_overflow(i8 %a) { +; CHECK-LABEL: @test_add_scalar_overflow( +; CHECK-NEXT: [[A_NEG:%.*]] = or i8 [[A:%.*]], -128 +; CHECK-NEXT: [[A_NNEG:%.*]] = and i8 [[A]], 127 +; CHECK-NEXT: call void @dummy(i8 -1) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[A_NNEG]], 10 +; CHECK-NEXT: call void @dummy(i8 [[TMP1]]) +; CHECK-NEXT: [[Y1:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A_NEG]], i8 -10) +; CHECK-NEXT: call void @dummy(i8 [[Y1]]) +; CHECK-NEXT: [[Y2:%.*]] = call i8 @llvm.sadd.sat.i8(i8 [[A_NNEG]], i8 10) +; CHECK-NEXT: call void @dummy(i8 [[Y2]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[A_NEG]], 10 +; CHECK-NEXT: call void @dummy(i8 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i8 [[A_NNEG]], -10 +; CHECK-NEXT: call void @dummy(i8 [[TMP3]]) +; CHECK-NEXT: ret void +; + %a_neg = or i8 %a, -128 + %a_nneg = and i8 %a, 127 + + %x1 = call i8 @llvm.uadd.sat.i8(i8 %a_neg, i8 -10) + call void @dummy(i8 %x1) + + %x2 = call i8 @llvm.uadd.sat.i8(i8 %a_nneg, i8 10) + call void @dummy(i8 %x2) + + %y1 = call i8 @llvm.sadd.sat.i8(i8 %a_neg, i8 -10) + call void @dummy(i8 %y1) + + %y2 = call i8 @llvm.sadd.sat.i8(i8 %a_nneg, i8 10) + call void @dummy(i8 %y2) + + %y3 = call i8 @llvm.sadd.sat.i8(i8 %a_neg, i8 10) + call void @dummy(i8 %y3) + + %y4 = call i8 @llvm.sadd.sat.i8(i8 %a_nneg, i8 -10) + call void @dummy(i8 %y4) + + ret void +} + +; Folding of two saturating vector adds -- only constant splats supported +define void @test_add_vector_combine(<2 x i8> %a) { +; CHECK-LABEL: @test_add_vector_combine( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP1]]) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> ) +; CHECK-NEXT: [[X5:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[X6:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X5]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[X6]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP2]]) +; CHECK-NEXT: [[Y3:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[Y4:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[Y3]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y4]]) +; CHECK-NEXT: [[Y5:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[Y6:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[Y5]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y6]]) +; CHECK-NEXT: ret void +; + %x1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %x2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x1, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x2) + + %x3 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %x4 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x3, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x4) + + %x5 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %x6 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %x5, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x6) + + %y1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %y2 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %y1, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y2) + + %y3 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %y4 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %y3, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y4) + + %y5 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %y6 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %y5, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y6) + + ret void +} + +; Known overflow information for vectors -- also works with non-splats +define void @test_add_vector_overflow(<2 x i8> %a) { +; CHECK-LABEL: @test_add_vector_overflow( +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A]], +; CHECK-NEXT: call void @dummy_vec(<2 x i8> ) +; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i8> [[A_NNEG]], +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP1]]) +; CHECK-NEXT: [[Y1:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y1]]) +; CHECK-NEXT: [[Y2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NNEG]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y2]]) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i8> [[A_NEG]], +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i8> [[A_NNEG]], +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP3]]) +; CHECK-NEXT: ret void +; + %a_neg = or <2 x i8> %a, + %a_nneg = and <2 x i8> %a, + + %x1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x1) + + %x2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x2) + + %y1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y1) + + %y2 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y2) + + %y3 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y3) + + %y4 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y4) + + ret void +} Index: test/Transforms/InstCombine/saturating-sub.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/saturating-sub.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +declare void @dummy(i8) +declare void @dummy_vec(<2 x i8>) +declare i8 @llvm.usub.sat.i8(i8, i8) +declare i8 @llvm.ssub.sat.i8(i8, i8) +declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) +declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) + +; Folding of two saturating subs +define void @test_sub_scalar_combine(i8 %a) { +; CHECK-LABEL: @test_sub_scalar_combine( +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 30) +; CHECK-NEXT: call void @dummy(i8 [[TMP1]]) +; CHECK-NEXT: call void @dummy(i8 0) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A]], i8 30) +; CHECK-NEXT: call void @dummy(i8 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A]], i8 -30) +; CHECK-NEXT: call void @dummy(i8 [[TMP3]]) +; CHECK-NEXT: [[V1:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A]], i8 10) +; CHECK-NEXT: [[V2:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[V1]], i8 -20) +; CHECK-NEXT: call void @dummy(i8 [[V2]]) +; CHECK-NEXT: [[W1:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A]], i8 100) +; CHECK-NEXT: [[W2:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[W1]], i8 100) +; CHECK-NEXT: call void @dummy(i8 [[W2]]) +; CHECK-NEXT: ret void +; + %x1 = call i8 @llvm.usub.sat.i8(i8 %a, i8 10) + %x2 = call i8 @llvm.usub.sat.i8(i8 %x1, i8 20) + call void @dummy(i8 %x2) + + %y1 = call i8 @llvm.usub.sat.i8(i8 %a, i8 100) + %y2 = call i8 @llvm.usub.sat.i8(i8 %y1, i8 200) + call void @dummy(i8 %y2) + + %z1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 10) + %z2 = call i8 @llvm.ssub.sat.i8(i8 %z1, i8 20) + call void @dummy(i8 %z2) + + %u1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 -10) + %u2 = call i8 @llvm.ssub.sat.i8(i8 %u1, i8 -20) + call void @dummy(i8 %u2) + + %v1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 10) + %v2 = call i8 @llvm.ssub.sat.i8(i8 %v1, i8 -20) + call void @dummy(i8 %v2) + + %w1 = call i8 @llvm.ssub.sat.i8(i8 %a, i8 100) + %w2 = call i8 @llvm.ssub.sat.i8(i8 %w1, i8 100) + call void @dummy(i8 %w2) + + ret void +} + +; Use of known overflow/no-overflow information +define void @test4(i8 %a) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: [[A_NEG:%.*]] = or i8 [[A:%.*]], -128 +; CHECK-NEXT: [[A_NNEG:%.*]] = and i8 [[A]], 127 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[A_NEG]], -10 +; CHECK-NEXT: call void @dummy(i8 [[TMP1]]) +; CHECK-NEXT: call void @dummy(i8 0) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i8 [[A_NEG]], 10 +; CHECK-NEXT: call void @dummy(i8 [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i8 [[A_NNEG]], -10 +; CHECK-NEXT: call void @dummy(i8 [[TMP3]]) +; CHECK-NEXT: [[Y3:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A_NEG]], i8 10) +; CHECK-NEXT: call void @dummy(i8 [[Y3]]) +; CHECK-NEXT: [[Y4:%.*]] = call i8 @llvm.ssub.sat.i8(i8 [[A_NNEG]], i8 -10) +; CHECK-NEXT: call void @dummy(i8 [[Y4]]) +; CHECK-NEXT: ret void +; + %a_neg = or i8 %a, -128 + %a_nneg = and i8 %a, 127 + + %x1 = call i8 @llvm.usub.sat.i8(i8 %a_neg, i8 10) + call void @dummy(i8 %x1) + + %x2 = call i8 @llvm.usub.sat.i8(i8 %a_nneg, i8 -10) + call void @dummy(i8 %x2) + + %y1 = call i8 @llvm.ssub.sat.i8(i8 %a_neg, i8 -10) + call void @dummy(i8 %y1) + + %y2 = call i8 @llvm.ssub.sat.i8(i8 %a_nneg, i8 10) + call void @dummy(i8 %y2) + + %y3 = call i8 @llvm.ssub.sat.i8(i8 %a_neg, i8 10) + call void @dummy(i8 %y3) + + %y4 = call i8 @llvm.ssub.sat.i8(i8 %a_nneg, i8 -10) + call void @dummy(i8 %y4) + + ret void +} + +; Folding of two saturating vector subs -- only constant splats supported +define void @test_add_vector_combine(<2 x i8> %a) { +; CHECK-LABEL: @test_add_vector_combine( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP1]]) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> zeroinitializer) +; CHECK-NEXT: [[X5:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[X6:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[X5]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[X6]]) +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP2]]) +; CHECK-NEXT: [[Y3:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[Y4:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[Y3]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y4]]) +; CHECK-NEXT: [[Y5:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A]], <2 x i8> ) +; CHECK-NEXT: [[Y6:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[Y5]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y6]]) +; CHECK-NEXT: ret void +; + %x1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %x2 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x1, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x2) + + %x3 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %x4 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x3, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x4) + + %x5 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %x6 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %x5, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x6) + + %y1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %y2 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %y1, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y2) + + %y3 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %y4 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %y3, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y4) + + %y5 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> ) + %y6 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %y5, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y6) + + ret void +} + +; Known overflow information for vectors -- also works with non-splats +define void @test_sub_vector_overflow(<2 x i8> %a) { +; CHECK-LABEL: @test_sub_vector_overflow( +; CHECK-NEXT: [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[A_NNEG:%.*]] = and <2 x i8> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[A_NEG]], +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP1]]) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> zeroinitializer) +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i8> [[A_NEG]], +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP2]]) +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <2 x i8> [[A_NNEG]], +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[TMP3]]) +; CHECK-NEXT: [[Y3:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y3]]) +; CHECK-NEXT: [[Y4:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A_NNEG]], <2 x i8> ) +; CHECK-NEXT: call void @dummy_vec(<2 x i8> [[Y4]]) +; CHECK-NEXT: ret void +; + %a_neg = or <2 x i8> %a, + %a_nneg = and <2 x i8> %a, + + %x1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x1) + + %x2 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %x2) + + %y1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y1) + + %y2 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y2) + + %y3 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a_neg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y3) + + %y4 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a_nneg, <2 x i8> ) + call void @dummy_vec(<2 x i8> %y4) + + ret void +}