Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1697,6 +1697,9 @@
     return SelectInst::Create(Cmp, Neg, A);
   }
 
+  if (Instruction *Ext = narrowMathIfNoOverflow(I))
+    return Ext;
+
   bool Changed = false;
   if (!I.hasNoSignedWrap() && willNotOverflowSignedSub(Op0, Op1, I)) {
     Changed = true;
Index: llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1451,29 +1451,40 @@
 /// sure the narrow op does not overflow.
 Instruction *InstCombiner::narrowMathIfNoOverflow(BinaryOperator &BO) {
   // We need at least one extended operand.
-  Value *LHS = BO.getOperand(0), *RHS = BO.getOperand(1);
+  Value *Op0 = BO.getOperand(0), *Op1 = BO.getOperand(1);
+
+  // If this is a sub, we swap the operands since we always want an extension
+  // on the RHS. The LHS can be an extension or a constant.
+  if (BO.getOpcode() == Instruction::Sub)
+    std::swap(Op0, Op1);
+
   Value *X;
-  bool IsSext = match(LHS, m_SExt(m_Value(X)));
-  if (!IsSext && !match(LHS, m_ZExt(m_Value(X))))
+  bool IsSext = match(Op0, m_SExt(m_Value(X)));
+  if (!IsSext && !match(Op0, m_ZExt(m_Value(X))))
     return nullptr;
 
   // If both operands are the same extension from the same source type and we
   // can eliminate at least one (hasOneUse), this might work.
   CastInst::CastOps CastOpc = IsSext ? Instruction::SExt : Instruction::ZExt;
   Value *Y;
-  if (!(match(RHS, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
-        cast<Operator>(RHS)->getOpcode() == CastOpc &&
-        (LHS->hasOneUse() || RHS->hasOneUse()))) {
+  if (!(match(Op1, m_ZExtOrSExt(m_Value(Y))) && X->getType() == Y->getType() &&
+        cast<Operator>(Op1)->getOpcode() == CastOpc &&
+        (Op0->hasOneUse() || Op1->hasOneUse()))) {
     // If that did not match, see if we have a suitable constant operand.
     // Truncating and extending must produce the same constant.
     Constant *WideC;
-    if (!LHS->hasOneUse() || !match(RHS, m_Constant(WideC)))
+    if (!Op0->hasOneUse() || !match(Op1, m_Constant(WideC)))
       return nullptr;
     Constant *NarrowC = ConstantExpr::getTrunc(WideC, X->getType());
     if (ConstantExpr::getCast(CastOpc, NarrowC, BO.getType()) != WideC)
       return nullptr;
     Y = NarrowC;
   }
+
+  // Swap back now that we found our operands.
+  if (BO.getOpcode() == Instruction::Sub)
+    std::swap(X, Y);
+
   // Both operands have narrow versions. Last step: the math must not overflow
   // in the narrow width.
   if (!willNotOverflow(BO.getOpcode(), X, Y, BO, IsSext))
Index: llvm/trunk/test/Transforms/InstCombine/narrow-math.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/narrow-math.ll
+++ llvm/trunk/test/Transforms/InstCombine/narrow-math.ll
@@ -491,5 +491,140 @@
   ret i64 %add
 }
 
+define i64 @test13(i32 %V) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !2
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !3
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !2
+  %call2 = call i32 @callee(), !range !3
+  %sext1 = sext i32 %call1 to i64
+  %sext2 = sext i32 %call2 to i64
+  %sub = sub i64 %sext1, %sext2
+  ret i64 %sub
+}
+
+define i64 @test14(i32 %V) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !2
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nuw nsw i32 [[CALL1]], [[CALL2]]
+; CHECK-NEXT:    [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !2
+  %call2 = call i32 @callee(), !range !0
+  %zext1 = zext i32 %call1 to i64
+  %zext2 = zext i32 %call2 to i64
+  %sub = sub i64 %zext1, %zext2
+  ret i64 %sub
+}
+
+define i64 @test15(i32 %V) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nsw i32 8, [[ASHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = sext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %ashr = ashr i32 %V, 1
+  %sext = sext i32 %ashr to i64
+  %sub = sub i64 8, %sext
+  ret i64 %sub
+}
+
+define <2 x i64> @test15vec(<2 x i32> %V) {
+; CHECK-LABEL: @test15vec(
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nsw <2 x i32> <i32 8, i32 8>, [[ASHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i32> [[SUBCONV]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %ashr = ashr <2 x i32> %V, <i32 1, i32 1>
+  %sext = sext <2 x i32> %ashr to <2 x i64>
+  %sub = sub <2 x i64> <i64 8, i64 8>, %sext
+  ret <2 x i64> %sub
+}
+
+define i64 @test16(i32 %V) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr i32 [[V:%.*]], 1
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nuw i32 -2, [[LSHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = zext i32 [[SUBCONV]] to i64
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %lshr = lshr i32 %V, 1
+  %zext = zext i32 %lshr to i64
+  %sub = sub i64 4294967294, %zext
+  ret i64 %sub
+}
+
+define <2 x i64> @test16vec(<2 x i32> %V) {
+; CHECK-LABEL: @test16vec(
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[SUBCONV:%.*]] = sub nuw <2 x i32> <i32 -2, i32 -2>, [[LSHR]]
+; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i32> [[SUBCONV]] to <2 x i64>
+; CHECK-NEXT:    ret <2 x i64> [[SUB]]
+;
+  %lshr = lshr <2 x i32> %V, <i32 1, i32 1>
+  %zext = zext <2 x i32> %lshr to <2 x i64>
+  %sub = sub <2 x i64> <i64 4294967294, i64 4294967294>, %zext
+  ret <2 x i64> %sub
+}
+
+; Negative test. Both have the same range so we can't guarantee the subtract
+; won't wrap.
+define i64 @test17(i32 %V) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[SEXT1:%.*]] = zext i32 [[CALL1]] to i64
+; CHECK-NEXT:    [[SEXT2:%.*]] = zext i32 [[CALL2]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i64 [[SEXT1]], [[SEXT2]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %call2 = call i32 @callee(), !range !0
+  %sext1 = zext i32 %call1 to i64
+  %sext2 = zext i32 %call2 to i64
+  %sub = sub i64 %sext1, %sext2
+  ret i64 %sub
+}
+
+; Negative test. LHS is large positive 32-bit number. Range of callee can
+; cause overflow.
+define i64 @test18(i32 %V) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !1
+; CHECK-NEXT:    [[SEXT1:%.*]] = sext i32 [[CALL1]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i64 2147481648, [[SEXT1]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !1
+  %sext1 = sext i32 %call1 to i64
+  %sub = sub i64 2147481648, %sext1
+  ret i64 %sub
+}
+
+; Negative test. LHS is large negative 32-bit number. Range of callee can
+; cause overflow.
+define i64 @test19(i32 %V) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT:    [[CALL1:%.*]] = call i32 @callee(), !range !0
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[CALL1]] to i64
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i64 -2147481648, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[SUB]]
+;
+  %call1 = call i32 @callee(), !range !0
+  %sext1 = sext i32 %call1 to i64
+  %sub = sub i64 -2147481648, %sext1
+  ret i64 %sub
+}
+
 !0 = !{ i32 0, i32 2000 }
 !1 = !{ i32 -2000, i32 0 }
+!2 = !{ i32 -512, i32 -255 }
+!3 = !{ i32 -128, i32 0 }