Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -780,6 +780,26 @@ } } +// Compute known sign bit from a shift operator, based on the known sign bit of +// its first operand and the nsw flag. KnownZero2 and KnownOne2 are the computed +// known bits of its first operand. +static void computeKnownSignBitFromShift(Operator *I, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2) { + // If this is a left shift with nsw, then the shift produces a poison value + // if it shifts out any bits that disagree with the resultant sign bit. + // This means the result is either a poison value or has the same sign bit + // as the first operand. + unsigned BitWidth = KnownZero.getBitWidth(); + if (I->getOpcode() == Instruction::Shl && + cast(I)->hasNoSignedWrap()) { + if (KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + else if (KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } +} + // Compute known bits from a shift operator, including those with a // non-constant shift amount. KnownZero and KnownOne are the outputs of this // function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the @@ -798,9 +818,12 @@ if (auto *SA = dyn_cast(I->getOperand(1))) { unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); - computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q); - KnownZero = KZF(KnownZero, ShiftAmt); - KnownOne = KOF(KnownOne, ShiftAmt); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); + KnownZero = KZF(KnownZero2, ShiftAmt); + KnownOne = KOF(KnownOne2, ShiftAmt); + + computeKnownSignBitFromShift(I, KnownZero, KnownOne, KnownZero2, KnownOne2); + return; } @@ -855,6 +878,8 @@ KnownOne &= KOF(KnownOne2, ShiftAmt); } + computeKnownSignBitFromShift(I, KnownZero, KnownOne, KnownZero2, KnownOne2); + // If there are no compatible shift amounts, then we've proven that the shift // amount must be >= the BitWidth, and the result is undefined. We could // return anything we'd like, but we need to make sure the sets of known bits @@ -1214,7 +1239,9 @@ unsigned Opcode = LU->getOpcode(); // Check for operations that have the property that if // both their operands have low zero bits, the result - // will have low zero bits. + // will have low zero bits. Also check for operations + // that are known to produce non-negative or negative + // recurrence values. if (Opcode == Instruction::Add || Opcode == Instruction::Sub || Opcode == Instruction::And || @@ -1240,6 +1267,40 @@ KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), KnownZero3.countTrailingOnes())); + + auto OverflowOp = dyn_cast(LU); + if (OverflowOp && OverflowOp->hasNoSignedWrap()) { + // If Initial value of recurrence is nonnegative, and we are adding + // a nonnegative number with nsw, the result can only be nonnegative + // or poison value regardless of the number of times we execute the + // add in phi recurrence. If initial value is negative and we are + // adding a negative number with nsw, the result can only be + // negative or poison value. Similar arguments apply to sub and mul. + // + // (add non-negative, non-negative) --> non-negative + // (add negative, negative) --> negative + if (Opcode == Instruction::Add) { + if (KnownZero2.isNegative() && KnownZero3.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + else if (KnownOne2.isNegative() && KnownOne3.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + + // (sub nsw non-negative, negative) --> non-negative + // (sub nsw negative, non-negative) --> negative + else if (Opcode == Instruction::Sub && LL == I) { + if (KnownZero2.isNegative() && KnownOne3.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + else if (KnownOne2.isNegative() && KnownZero3.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + + // (mul nsw non-negative, non-negative) --> non-negative + else if (Opcode == Instruction::Mul && KnownZero2.isNegative() && + KnownZero3.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + } + break; } } Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -36,6 +36,7 @@ #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -1281,7 +1282,8 @@ } } // Our raison d'etre! Eliminate sign and zero extension. - if (IsSigned ? isa(DU.NarrowUse) : isa(DU.NarrowUse)) { + if ((isa(DU.NarrowUse) && (IsSigned || DU.NeverNegative)) || + (isa(DU.NarrowUse) && (!IsSigned || DU.NeverNegative))) { Value *NewDef = DU.WideDef; if (DU.NarrowUse->getType() != WideType) { unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType()); @@ -1370,9 +1372,12 @@ /// void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) { const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef); + // isKnownPredicate is enough for most cases but still need isKnownNonNegative + // here to work around conservatism in ScalarEvolution about no-wrap flags. bool NeverNegative = SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV, - SE->getConstant(NarrowSCEV->getType(), 0)); + SE->getConstant(NarrowSCEV->getType(), 0)) || + isKnownNonNegative(NarrowDef, NarrowDef->getModule()->getDataLayout()); for (User *U : NarrowDef->users()) { Instruction *NarrowUser = cast(U); Index: test/Analysis/ValueTracking/known-non-negative.ll =================================================================== --- test/Analysis/ValueTracking/known-non-negative.ll +++ test/Analysis/ValueTracking/known-non-negative.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Induction variable is known to be non-negative +define i32 @test_indvar_nonnegative_add() { +; CHECK-LABEL: @test_indvar_nonnegative_add( +; CHECK: br i1 true, label %for.end, label %for.body +entry: + br label %for.body + +for.body: + %i = phi i32 [0, %entry], [%inc, %for.body] + %inc = add nsw i32 %i, 1 + %cmp = icmp sge i32 %i, 0 + br i1 %cmp, label %for.end, label %for.body + +for.end: + ret i32 %i +} + +define i32 @test_indvar_nonnegative_mul() { +; CHECK-LABEL: @test_indvar_nonnegative_mul( +; CHECK: br i1 true, label %for.end, label %for.body +entry: + br label %for.body + +for.body: + %i = phi i32 [1, %entry], [%inc, %for.body] + %inc = mul nsw i32 %i, 3 + %cmp = icmp sge i32 %i, 0 + br i1 %cmp, label %for.end, label %for.body + +for.end: + ret i32 %i +} + +define i32 @test_indvar_nonnegative_sub(i32 %a) { +; CHECK-LABEL: @test_indvar_nonnegative_sub( +; CHECK: br i1 true, label %for.end, label %for.body +entry: + br label %for.body + +for.body: + %i = phi i32 [0, %entry], [%inc, %for.body] + %b = or i32 %a, -2147483648 + %inc = sub nsw i32 %i, %b + %cmp = icmp sge i32 %i, 0 + br i1 %cmp, label %for.end, label %for.body + +for.end: + ret i32 %i +} + +; Result of left shifting a non-negative integer +; with nsw flag should also be non-negative +define i1 @test_shift_nonnegative(i32 %a) { +; CHECK-LABEL: @test_shift_nonnegative( +; CHECK: ret i1 true + %b = lshr i32 %a, 2 + %shift = shl nsw i32 %b, 3 + %cmp = icmp sge i32 %shift, 0 + ret i1 %cmp +} Index: test/Transforms/BBVectorize/loop1.ll =================================================================== --- test/Transforms/BBVectorize/loop1.ll +++ test/Transforms/BBVectorize/loop1.ll @@ -83,7 +83,7 @@ ; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11 ; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>* ; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8 -; CHECK-UNRL: %indvars.iv.next.1 = add nsw i64 %indvars.iv, 2 +; CHECK-UNRL: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2 ; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32 ; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10 ; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body Index: test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll =================================================================== --- test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll +++ test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll @@ -0,0 +1,82 @@ +; RUN: opt < %s -indvars -S | FileCheck %s + +target datalayout = "p:64:64:64-n32:64" + +; When widening IV and its users, trunc and zext/sext are not needed +; if the original 32-bit user is known to be non-negative, whether +; the IV is considered signed or unsigned. +define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) { +; CHECK-LABEL: @foo( +; CHECK-NOT: zext +; CHECK-NOT: sext +entry: + %cmp1 = icmp slt i32 0, %N + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %idxprom = sext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %i.02, 2 + %idxprom1 = zext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %add3 = add nsw i32 %0, %1 + %idxprom4 = zext i32 %i.02 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4 + store i32 %add3, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %N + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} + +define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) { +; CHECK-LABEL: @foo1( +; CHECK-NOT: zext +; CHECK-NOT: sext +entry: + %cmp1 = icmp slt i32 0, %N + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %idxprom = zext i32 %i.02 to i64 + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %i.02, 2 + %idxprom1 = sext i32 %add to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1 + %1 = load i32, i32* %arrayidx2, align 4 + %add3 = add nsw i32 %0, %1 + %idxprom4 = sext i32 %i.02 to i64 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4 + store i32 %add3, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %N + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +}