Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -4084,16 +4084,25 @@ return ConstantRange(Known.One, ~Known.Zero + 1); } +/// Combine constant ranges from computeConstantRange() and computeKnownBits(). +static ConstantRange computeConstantRangeIncludingKnownBits( + const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, + const Instruction *CxtI, const DominatorTree *DT, + OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) { + KnownBits Known = computeKnownBits( + V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo); + ConstantRange CR = computeConstantRange(V, UseInstrInfo); + return CR.intersectWith(constantRangeFromKnownBits(Known)); +} + OverflowResult llvm::computeOverflowForUnsignedAdd( const Value *LHS, const Value *RHS, const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) { - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT, - nullptr, UseInstrInfo); - ConstantRange LHSRange = constantRangeFromKnownBits(LHSKnown); - ConstantRange RHSRange = constantRangeFromKnownBits(RHSKnown); + ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( + LHS, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, UseInstrInfo); + ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( + RHS, DL, /*Depth=*/0, AC, CxtI, DT, nullptr, UseInstrInfo); return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange)); } @@ -4206,10 +4215,10 @@ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { - KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT); - KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT); - ConstantRange LHSRange = constantRangeFromKnownBits(LHSKnown); - ConstantRange RHSRange = constantRangeFromKnownBits(RHSKnown); + ConstantRange LHSRange = computeConstantRangeIncludingKnownBits( + LHS, DL, /*Depth=*/0, AC, CxtI, DT); + ConstantRange RHSRange = computeConstantRangeIncludingKnownBits( + RHS, DL, /*Depth=*/0, AC, CxtI, DT); return mapOverflowResult(LHSRange.unsignedSubMayOverflow(RHSRange)); } @@ -5697,10 +5706,36 @@ } ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { - assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction"); + Type *Ty = V->getType(); + assert(Ty->isIntOrIntVectorTy() && "Expected integer instruction"); + + const APInt *C; + if (match(V, m_APInt(C))) + return ConstantRange(*C); + + unsigned BitWidth = Ty->getScalarSizeInBits(); + if (const ConstantDataVector *CDV = dyn_cast(V)) { + // For constant vectors we compute the range between the minimal and + // maximal vector element. We can do this either using an unsigned or a + // signed interpretation: both would produce legal ranges, but one may be + // more useful than the other depending on the operation it is used in. + // For now we're making the arbitrary choice to use an unsigned range. + APInt UnsignedMin, UnsignedMax; + UnsignedMin = UnsignedMax = CDV->getElementAsAPInt(0); + for (unsigned i = 1, e = CDV->getNumElements(); i != e; ++i) { + APInt Elt = CDV->getElementAsAPInt(i); + if (Elt.ult(UnsignedMin)) + UnsignedMin = Elt; + if (Elt.ugt(UnsignedMax)) + UnsignedMax = Elt; + } + + if (UnsignedMin == UnsignedMax + 1) + return ConstantRange(BitWidth, /* full */ true); + return ConstantRange(UnsignedMin, UnsignedMax + 1); + } InstrInfoQuery IIQ(UseInstrInfo); - unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt Lower = APInt(BitWidth, 0); APInt Upper = APInt(BitWidth, 0); if (auto *BO = dyn_cast(V)) Index: llvm/test/Transforms/InstCombine/and2.ll =================================================================== --- llvm/test/Transforms/InstCombine/and2.ll +++ llvm/test/Transforms/InstCombine/and2.ll @@ -154,7 +154,7 @@ ; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_multiuse( ; CHECK-NEXT: [[SH:%.*]] = lshr i8 1, %x ; CHECK-NEXT: [[AND:%.*]] = and i8 [[SH]], 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[SH]], [[AND]] +; CHECK-NEXT: [[ADD:%.*]] = add nuw i8 [[SH]], [[AND]] ; CHECK-NEXT: ret i8 [[ADD]] ; %sh = lshr i8 1, %x Index: llvm/test/Transforms/InstCombine/saturating-add-sub.ll =================================================================== --- llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -355,7 +355,7 @@ define i8 @test_scalar_uadd_urem_no_ov(i8 %a) { ; CHECK-LABEL: @test_scalar_uadd_urem_no_ov( ; CHECK-NEXT: [[B:%.*]] = urem i8 [[A:%.*]], 100 -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[B]], i8 -100) +; CHECK-NEXT: [[R:%.*]] = add nuw nsw i8 [[B]], -100 ; CHECK-NEXT: ret i8 [[R]] ; %b = urem i8 %a, 100 @@ -379,7 +379,7 @@ ; CHECK-LABEL: @test_scalar_uadd_urem_known_bits( ; CHECK-NEXT: [[AA:%.*]] = udiv i8 -66, [[A:%.*]] ; CHECK-NEXT: [[BB:%.*]] = and i8 [[B:%.*]], 63 -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[AA]], i8 [[BB]]) +; CHECK-NEXT: [[R:%.*]] = add nuw i8 [[AA]], [[BB]] ; CHECK-NEXT: ret i8 [[R]] ; %aa = udiv i8 190, %a @@ -768,8 +768,7 @@ define i8 @test_scalar_usub_add_nuw_no_ov(i8 %a) { ; CHECK-LABEL: @test_scalar_usub_add_nuw_no_ov( -; CHECK-NEXT: [[B:%.*]] = add nuw i8 [[A:%.*]], 10 -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 9) +; CHECK-NEXT: [[R:%.*]] = add i8 [[A:%.*]], 1 ; CHECK-NEXT: ret i8 [[R]] ; %b = add nuw i8 %a, 10 @@ -779,9 +778,7 @@ define i8 @test_scalar_usub_add_nuw_eq(i8 %a) { ; CHECK-LABEL: @test_scalar_usub_add_nuw_eq( -; CHECK-NEXT: [[B:%.*]] = add nuw i8 [[A:%.*]], 10 -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 10) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 [[A:%.*]] ; %b = add nuw i8 %a, 10 %r = call i8 @llvm.usub.sat.i8(i8 %b, i8 10) @@ -801,9 +798,7 @@ define i8 @test_scalar_usub_urem_must_ov(i8 %a) { ; CHECK-LABEL: @test_scalar_usub_urem_must_ov( -; CHECK-NEXT: [[B:%.*]] = urem i8 [[A:%.*]], 10 -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[B]], i8 10) -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %b = urem i8 %a, 10 %r = call i8 @llvm.usub.sat.i8(i8 %b, i8 10) @@ -828,7 +823,7 @@ ; CHECK-LABEL: @test_scalar_usub_add_nuw_known_bits( ; CHECK-NEXT: [[AA:%.*]] = add nuw i8 [[A:%.*]], 10 ; CHECK-NEXT: [[BB:%.*]] = and i8 [[B:%.*]], 7 -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[AA]], i8 [[BB]]) +; CHECK-NEXT: [[R:%.*]] = sub nuw i8 [[AA]], [[BB]] ; CHECK-NEXT: ret i8 [[R]] ; %aa = add nuw i8 %a, 10 @@ -840,7 +835,7 @@ define i8 @test_scalar_usub_add_nuw_inferred(i8 %a) { ; CHECK-LABEL: @test_scalar_usub_add_nuw_inferred( ; CHECK-NEXT: [[B:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[A:%.*]], i8 10) -; CHECK-NEXT: [[R:%.*]] = add i8 [[B]], 9 +; CHECK-NEXT: [[R:%.*]] = add nuw i8 [[B]], 9 ; CHECK-NEXT: ret i8 [[R]] ; %b = call i8 @llvm.usub.sat.i8(i8 %a, i8 10) @@ -850,8 +845,7 @@ define <2 x i8> @test_vector_usub_add_nuw_no_ov(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov( -; CHECK-NEXT: [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, @@ -862,8 +856,7 @@ ; Can be optimized if the usub.sat RHS constant range handles non-splat vectors. define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1(<2 x i8> %a) { ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat1( -; CHECK-NEXT: [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> ) +; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[A:%.*]], ; CHECK-NEXT: ret <2 x i8> [[R]] ; %b = add nuw <2 x i8> %a, Index: llvm/test/Transforms/LoopVectorize/X86/small-size.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/small-size.ll +++ llvm/test/Transforms/LoopVectorize/X86/small-size.ll @@ -81,7 +81,7 @@ ; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP2]], 3 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]