Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -780,6 +780,26 @@
   }
 }
 
+// Compute known sign bit from a shift operator, based on the known sign bit of
+// its first operand and the nsw flag. KnownZero2 and KnownOne2 are the computed
+// known bits of its first operand.
+static void computeKnownSignBitFromShift(Operator *I, 
+                                         APInt &KnownZero, APInt &KnownOne,
+                                         APInt &KnownZero2, APInt &KnownOne2) {
+  // If this is a left shift with nsw, then the shift produces a poison value
+  // if it shifts out any bits that disagree with the resultant sign bit.
+  // This means the result is either a poison value or has the same sign bit 
+  // as the first operand.
+  unsigned BitWidth = KnownZero.getBitWidth();
+  if (I->getOpcode() == Instruction::Shl &&
+      cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap()) {
+    if (KnownZero2.isNegative())
+        KnownZero |= APInt::getSignBit(BitWidth);
+    else if (KnownOne2.isNegative())
+        KnownOne |= APInt::getSignBit(BitWidth);
+  }
+}
+
 // Compute known bits from a shift operator, including those with a
 // non-constant shift amount. KnownZero and KnownOne are the outputs of this
 // function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
@@ -798,9 +818,12 @@
   if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
     unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
 
-    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
-    KnownZero = KZF(KnownZero, ShiftAmt);
-    KnownOne  = KOF(KnownOne, ShiftAmt);
+    computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q);
+    KnownZero = KZF(KnownZero2, ShiftAmt);
+    KnownOne  = KOF(KnownOne2, ShiftAmt);
+
+    computeKnownSignBitFromShift(I, KnownZero, KnownOne, KnownZero2, KnownOne2);
+
     return;
   }
 
@@ -855,6 +878,8 @@
     KnownOne  &= KOF(KnownOne2, ShiftAmt);
   }
 
+  computeKnownSignBitFromShift(I, KnownZero, KnownOne, KnownZero2, KnownOne2);
+
   // If there are no compatible shift amounts, then we've proven that the shift
   // amount must be >= the BitWidth, and the result is undefined. We could
   // return anything we'd like, but we need to make sure the sets of known bits
@@ -1214,7 +1239,9 @@
         unsigned Opcode = LU->getOpcode();
         // Check for operations that have the property that if
         // both their operands have low zero bits, the result
-        // will have low zero bits.
+        // will have low zero bits. Also check for operations 
+        // that are known to produce non-negative or negative
+        // recurrence values. 
         if (Opcode == Instruction::Add ||
             Opcode == Instruction::Sub ||
             Opcode == Instruction::And ||
@@ -1240,6 +1267,40 @@
           KnownZero = APInt::getLowBitsSet(BitWidth,
                                            std::min(KnownZero2.countTrailingOnes(),
                                                     KnownZero3.countTrailingOnes()));
+
+          auto OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
+          if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
+            // If Initial value of recurrence is nonnegative, and we are adding 
+            // a nonnegative number with nsw, the result can only be nonnegative
+            // or poison value regardless of the number of times we execute the 
+            // add in phi recurrence. If initial value is negative and we are 
+            // adding a negative number with nsw, the result can only be 
+            // negative or poison value. Similar arguments apply to sub and mul.
+            //
+            // (add non-negative, non-negative) --> non-negative
+            // (add negative, negative) --> negative
+            if (Opcode == Instruction::Add) {
+              if (KnownZero2.isNegative() && KnownZero3.isNegative())
+                KnownZero |= APInt::getSignBit(BitWidth);
+              else if (KnownOne2.isNegative() && KnownOne3.isNegative())
+                KnownOne |= APInt::getSignBit(BitWidth);
+            }
+            
+            // (sub nsw non-negative, negative) --> non-negative
+            // (sub nsw negative, non-negative) --> negative
+            else if (Opcode == Instruction::Sub && LL == I) {
+              if (KnownZero2.isNegative() && KnownOne3.isNegative())
+                KnownZero |= APInt::getSignBit(BitWidth);
+              else if (KnownOne2.isNegative() && KnownZero3.isNegative())
+                KnownOne |= APInt::getSignBit(BitWidth);
+            }
+            
+            // (mul nsw non-negative, non-negative) --> non-negative
+            else if (Opcode == Instruction::Mul && KnownZero2.isNegative() && 
+                KnownZero3.isNegative())
+              KnownZero |= APInt::getSignBit(BitWidth);
+          }
+
           break;
         }
       }
Index: lib/Transforms/Scalar/IndVarSimplify.cpp
===================================================================
--- lib/Transforms/Scalar/IndVarSimplify.cpp
+++ lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -36,6 +36,7 @@
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
@@ -1281,7 +1282,8 @@
     }
   }
   // Our raison d'etre! Eliminate sign and zero extension.
-  if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
+  if ((isa<SExtInst>(DU.NarrowUse) && (IsSigned || DU.NeverNegative)) ||
+      (isa<ZExtInst>(DU.NarrowUse) && (!IsSigned || DU.NeverNegative))) {
     Value *NewDef = DU.WideDef;
     if (DU.NarrowUse->getType() != WideType) {
       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
@@ -1370,9 +1372,12 @@
 ///
 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+  // isKnownPredicate is enough for most cases but still need isKnownNonNegative 
+  // here to work around conservatism in ScalarEvolution about no-wrap flags. 
   bool NeverNegative =
       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
-                           SE->getConstant(NarrowSCEV->getType(), 0));
+                           SE->getConstant(NarrowSCEV->getType(), 0)) ||
+      isKnownNonNegative(NarrowDef, NarrowDef->getModule()->getDataLayout());
   for (User *U : NarrowDef->users()) {
     Instruction *NarrowUser = cast<Instruction>(U);
 
Index: test/Analysis/ValueTracking/known-non-negative.ll
===================================================================
--- test/Analysis/ValueTracking/known-non-negative.ll
+++ test/Analysis/ValueTracking/known-non-negative.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Induction variable is known to be non-negative
+define i32 @test_indvar_nonnegative_add() {
+; CHECK-LABEL: @test_indvar_nonnegative_add(
+; CHECK: br i1 true, label %for.end, label %for.body
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [0, %entry], [%inc, %for.body]
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp sge i32 %i, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32 %i
+}
+
+define i32 @test_indvar_nonnegative_mul() {
+; CHECK-LABEL: @test_indvar_nonnegative_mul(
+; CHECK: br i1 true, label %for.end, label %for.body
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [1, %entry], [%inc, %for.body]
+  %inc = mul nsw i32 %i, 3
+  %cmp = icmp sge i32 %i, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32 %i
+}
+
+define i32 @test_indvar_nonnegative_sub(i32 %a) {
+; CHECK-LABEL: @test_indvar_nonnegative_sub(
+; CHECK: br i1 true, label %for.end, label %for.body
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [0, %entry], [%inc, %for.body]
+  %b = or i32 %a, -2147483648
+  %inc = sub nsw i32 %i, %b
+  %cmp = icmp sge i32 %i, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+  ret i32 %i
+}
+
+; Result of left shifting a non-negative integer 
+; with nsw flag should also be non-negative
+define i1 @test_shift_nonnegative(i32 %a) {
+; CHECK-LABEL: @test_shift_nonnegative(
+; CHECK: ret i1 true
+  %b = lshr i32 %a, 2
+  %shift = shl nsw i32 %b, 3
+  %cmp = icmp sge i32 %shift, 0
+  ret i1 %cmp
+}
Index: test/Transforms/BBVectorize/loop1.ll
===================================================================
--- test/Transforms/BBVectorize/loop1.ll
+++ test/Transforms/BBVectorize/loop1.ll
@@ -83,7 +83,7 @@
 ; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
 ; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
 ; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
-; CHECK-UNRL: %indvars.iv.next.1 = add nsw i64 %indvars.iv, 2
+; CHECK-UNRL: %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv, 2
 ; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
 ; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
 ; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
Index: test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
===================================================================
--- test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
+++ test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "p:64:64:64-n32:64"
+
+; When widening IV and its users, trunc and zext/sext are not needed
+; if the original 32-bit user is known to be non-negative, whether
+; the IV is considered signed or unsigned.
+define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) {
+; CHECK-LABEL: @foo(
+; CHECK-NOT: zext
+; CHECK-NOT: sext
+entry:
+  %cmp1 = icmp slt i32 0, %N
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.inc
+  %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %idxprom = sext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.02, 2
+  %idxprom1 = zext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %0, %1
+  %idxprom4 = zext i32 %i.02 to i64
+  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
+  store i32 %add3, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %N
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}
+
+define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) {
+; CHECK-LABEL: @foo1(
+; CHECK-NOT: zext
+; CHECK-NOT: sext
+entry:
+  %cmp1 = icmp slt i32 0, %N
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.inc
+  %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %idxprom = zext i32 %i.02 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.02, 2
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %0, %1
+  %idxprom4 = sext i32 %i.02 to i64
+  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
+  store i32 %add3, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.02, 1
+  %cmp = icmp slt i32 %inc, %N
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret void
+}