Index: lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -155,8 +155,8 @@
 }
 
 /// EvaluateInDifferentType - Given an expression that
-/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
-/// insert the code to evaluate the expression.
+/// IsProfitableToEvaluateTruncated or CanEvaluateSExtd returns true for,
+/// actually insert the code to evaluate the expression.
 Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V)) {
@@ -221,9 +221,26 @@
   }
   default:
     // TODO: Can handle more cases here.
-    llvm_unreachable("Unreachable!");
+    // This is not a recognized instruction. We need an explicit cast here.
+    // Currently we can reach this only when truncating expression type.
+    assert(CastInst::castIsValid(Instruction::Trunc, I, Ty) &&
+           "can not insert explicit cast");
+    // hasOneUse property is guaranteed by "EstimateCostForTruncatedEvaluation"
+    assert(I->hasOneUse() &&
+           "can not truncate branching expression trees");
+
+    // Create truncate from "I" type to the Ty
+    Instruction *Trunc = CastInst::CreateTruncOrBitCast(I, Ty);
+    Trunc->setName(I->getName() + ".truncated");
+
+    // Insert new truncate after "I"
+    Instruction *NextI = I->getNextNode();
+    assert(NextI != I->getParent()->end() &&
+           "can not insert explicit truncate");
+    return InsertNewInstBefore(Trunc, *NextI);
   }
-
+  
+  assert(Res && "failed to evaluate instruction in different type");
   Res->takeName(I);
   return InsertNewInstWith(Res, *I);
 }
@@ -318,19 +335,16 @@
   return nullptr;
 }
 
-/// CanEvaluateTruncated - Return true if we can evaluate the specified
-/// expression tree as type Ty instead of its larger type, and arrive with the
-/// same value.  This is used by code that tries to eliminate truncates.
-///
-/// Ty will always be a type smaller than V.  We should return true if trunc(V)
-/// can be computed by computing V in the smaller type.  If V is an instruction,
-/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
-/// makes sense if x and y can be efficiently truncated.
-///
-/// This function works on both vectors and scalars.
-///
-static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
-                                 Instruction *CxtI) {
+/// EstimateCostForTruncatedEvaluation - Return true if we can evaluate the
+/// specified expression tree as type Ty instead of its larger type, and
+/// arrive at the same value. Also calculate cost of this transformation
+/// and store it into the "Cost" argument.
+/// Currently cost model is simple - increase cost by one for each additional
+/// instruction required, decrease cost by one for each instruction that
+/// will be removed after truncating expression tree.
+static bool EstimateCostForTruncatedEvaluation(int &Cost, Value *V,
+                                               Type *Ty, InstCombiner &IC,
+                                               Instruction *CxtI) {
   // We can always evaluate constants in another type.
   if (isa<Constant>(V))
     return true;
@@ -340,11 +354,17 @@
 
   Type *OrigTy = V->getType();
 
-  // If this is an extension from the dest type, we can eliminate it, even if it
-  // has multiple uses.
+  // If this is an extension from the dest type, we will not need any additional
+  // instructions to truncate it, even if it has multiple uses.
   if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
-      I->getOperand(0)->getType() == Ty)
+      I->getOperand(0)->getType() == Ty) {
+
+    // Add a cost bonus if we can eliminate this cast.
+    if (I->hasOneUse())
+      Cost -= 1;
+
     return true;
+  }
 
   // We can't extend or shrink something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
@@ -359,8 +379,10 @@
   case Instruction::Or:
   case Instruction::Xor:
     // These operators can all arbitrarily be extended or truncated.
-    return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
-           CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+    return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty, IC,
+                                              CxtI) &&
+           EstimateCostForTruncatedEvaluation(Cost, I->getOperand(1), Ty, IC,
+                                              CxtI);
 
   case Instruction::UDiv:
   case Instruction::URem: {
@@ -371,8 +393,10 @@
       APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
       if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
           IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
-        return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
-               CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+        return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty,
+                                                  IC, CxtI) &&
+               EstimateCostForTruncatedEvaluation(Cost, I->getOperand(1), Ty,
+                                                  IC, CxtI);
       }
     }
     break;
@@ -383,7 +407,8 @@
     if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint32_t BitWidth = Ty->getScalarSizeInBits();
       if (CI->getLimitedValue(BitWidth) < BitWidth)
-        return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+        return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty,
+                                                  IC, CxtI);
     }
     break;
   case Instruction::LShr:
@@ -396,7 +421,8 @@
       if (IC.MaskedValueIsZero(I->getOperand(0),
             APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) &&
           CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+        return EstimateCostForTruncatedEvaluation(Cost, I->getOperand(0), Ty,
+                                                  IC, CxtI);
       }
     }
     break;
@@ -410,8 +436,10 @@
     return true;
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
-    return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
-           CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
+    return EstimateCostForTruncatedEvaluation(Cost, SI->getTrueValue(), Ty, IC,
+                                              CxtI) &&
+           EstimateCostForTruncatedEvaluation(Cost, SI->getFalseValue(), Ty, IC,
+                                              CxtI);
   }
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
@@ -419,16 +447,46 @@
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
     for (Value *IncValue : PN->incoming_values())
-      if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI))
+      if (!EstimateCostForTruncatedEvaluation(Cost, IncValue, Ty, IC, CxtI))
         return false;
     return true;
   }
-  default:
-    // TODO: Can handle more cases here.
-    break;
+
+  // TODO: Can handle more cases here.
   }
 
-  return false;
+  // This is not a recognized instruction, but we can always truncate it by
+  // adding additional explicit trunc.
+  Cost += 1;
+  return true;
+}
+
+/// IsProfitableToEvaluateTruncated - Given the expression tree starting with
+/// truncate instruction return true if it would be profitable to remove
+/// this truncate and evaluate expression in a narrower type. This is used by
+/// code that tries to eliminate truncates.
+///
+/// This function works on both vectors and scalars.
+///
+static bool IsProfitableToEvaluateTruncated(TruncInst *Trunc,
+                                            InstCombiner &IC) {
+  Type *DestTy = Trunc->getType(), *SrcTy = Trunc->getOperand(0)->getType();
+
+  // Only truncate if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if (!DestTy->isVectorTy() && !IC.ShouldChangeType(SrcTy, DestTy))
+    return false;
+
+  int Cost = 0;
+  bool CanTruncate = EstimateCostForTruncatedEvaluation(
+      Cost, Trunc->getOperand(0), DestTy, IC, Trunc);
+
+  // Threshold picked in a way that it will allow only cases when number of
+  // removed instructions is strictly greater than number of the additional
+  // instructions required.
+  const int CostThreshold = 1;
+  return CanTruncate && Cost < CostThreshold;
 }
 
 Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
@@ -450,15 +508,11 @@
     return &CI;
 
   Value *Src = CI.getOperand(0);
-  Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+  Type *DestTy = CI.getType();
 
   // Attempt to truncate the entire input expression tree to the destination
-  // type.   Only do this if the dest type is a simple type, don't convert the
-  // expression tree to something weird like i93 unless the source is also
-  // strange.
-  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateTruncated(Src, DestTy, *this, &CI)) {
-
+  // type.
+  if (IsProfitableToEvaluateTruncated(&CI, *this)) {
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
Index: lib/Transforms/InstCombine/InstCombineInternal.h
===================================================================
--- lib/Transforms/InstCombine/InstCombineInternal.h
+++ lib/Transforms/InstCombine/InstCombineInternal.h
@@ -340,8 +340,8 @@
   bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp,
                                  const unsigned SIOpd);
 
-private:
   bool ShouldChangeType(Type *From, Type *To) const;
+private:
   Value *dyn_castNegVal(Value *V) const;
   Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
   Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
Index: test/Transforms/InstCombine/trunc.ll
===================================================================
--- test/Transforms/InstCombine/trunc.ll
+++ test/Transforms/InstCombine/trunc.ll
@@ -118,3 +118,70 @@
 ; CHECK: and
 ; CHECK: ret
 }
+
+; Here by eliminating trunc we can get rid of a redundant zext. However
+; additional trunc should also be inserted. Check that we consider this
+; transform to be profitable.
+define void @test11(i8 *%res, i32* %t, i8* %s) {
+ entry:
+  %m = load i32, i32* %t
+  %n = load i8, i8* %s
+
+  %n.i32 = zext i8 %n to i32
+  %v = sub i32 %n.i32, %m
+  %v.i8 = trunc i32 %v to i8
+
+  store i8 %v.i8, i8* %res
+  ret void
+; CHECK-LABEL: @test11(
+; CHECK: %m = load i32, i32* %t
+; CHECK: %m.truncated = trunc i32 %m to i8
+; CHECK: %n = load i8, i8* %s
+; CHECK: %v = sub i8 %n, %m.truncated
+; CHECK: store i8 %v, i8* %res
+}
+
+; Here by eliminating trunc we will not remove any additional instructions.
+; Check that this case considered to be not profitable.
+define void @test12(i8 *%res, i32* %t, i32* %s) {
+ entry:
+  %m = load i32, i32* %t
+  %n = load i32, i32* %s
+
+  %v = sub i32 %n, %m
+  %v.i8 = trunc i32 %v to i8
+
+  store i8 %v.i8, i8* %res
+  ret void
+; CHECK-LABEL: @test12(
+; CHECK: %m = load i32, i32* %t
+; CHECK: %n = load i32, i32* %s
+; CHECK: %v = sub i32 %n, %m
+; CHECK: %v.i8 = trunc i32 %v to i8
+; CHECK: store i8 %v.i8, i8* %res
+}
+
+; Check that casts with multiple uses are considered not possible to remove.
+declare void @use.test13(i32)
+define void @test13(i8 *%res, i32* %t, i8* %s) {
+ entry:
+  %m = load i32, i32* %t
+  %n = load i8, i8* %s
+
+  %n.i32 = zext i8 %n to i32
+  %v = sub i32 %n.i32, %m
+  %v.i8 = trunc i32 %v to i8
+
+  store i8 %v.i8, i8* %res
+  call void @use.test13(i32 %n.i32)
+  ret void
+; CHECK-LABEL: @test13(
+; CHECK: %m = load i32, i32* %t
+; CHECK: %n = load i8, i8* %s
+; CHECK: %n.i32 = zext i8 %n to i32
+; CHECK: %v = sub i32 %n.i32, %m
+; CHECK: %v.i8 = trunc i32 %v to i8
+; CHECK: store i8 %v.i8, i8* %res
+; CHECK: call void @use.test13(i32 %n.i32)
+; CHECK: ret void
+}