Index: lib/Analysis/ValueTracking.cpp
===================================================================
--- lib/Analysis/ValueTracking.cpp
+++ lib/Analysis/ValueTracking.cpp
@@ -2196,6 +2196,16 @@
     if (Tmp == 1) return 1;  // Early out.
     return std::min(Tmp, Tmp2)-1;
 
+  case Instruction::Mul: {
+    // The output of the Mul can at most twice the valid bits in the inputs.
+    Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
+    if (Tmp2 == 1) return 1;
+    unsigned OutValidBits = (TyBits - Tmp + 1) + (TyBits - Tmp2 + 1);
+    return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
+  }
+
   case Instruction::PHI: {
     const PHINode *PN = cast<PHINode>(U);
     unsigned NumIncomingValues = PN->getNumIncomingValues();
Index: lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -369,6 +369,21 @@
       }
     }
     break;
+  case Instruction::AShr:
+    // If this is a truncate of an arithmetic shr, we can truncate it to a
+    // smaller ashr iff we know that all the bits from the sign bit of the
+    // original type and the sign bit of the truncate type are similar.
+    // TODO: It is enough to check that the bits we would be shifting in are
+    //       similar to sign bit of the truncate type.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (CI->getLimitedValue(BitWidth) < BitWidth &&
+          OrigBitWidth - BitWidth <
+              IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
+        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+    }
+    break;
   case Instruction::Trunc:
     // trunc(trunc(x)) -> trunc(x)
     return true;
Index: lib/Transforms/InstCombine/InstCombineShifts.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -560,7 +560,9 @@
     }
 
     // (X >>u C) << C --> X & (-1 << C)
-    if (match(Op0, m_LShr(m_Value(X), m_Specific(Op1)))) {
+    // (X >>s C) << C --> X & (-1 << C)
+    if (match(Op0, m_LShr(m_Value(X), m_Specific(Op1))) ||
+        match(Op0, m_AShr(m_Value(X), m_Specific(Op1)))) {
       APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
       return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
     }
Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -465,7 +465,7 @@
   case Instruction::LShr: {
     const APInt *SA;
     if (match(I->getOperand(1), m_APInt(SA))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
 
       // Unsigned shift right.
       APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
@@ -521,9 +521,12 @@
       if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1))
         return I;
 
+      unsigned SignBits = ComputeNumSignBits(I->getOperand(0), 0, CxtI);
+
       assert(!Known.hasConflict() && "Bits known to be one AND zero?");
       // Compute the new bits that are at the top now.
-      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+      APInt HighBits(APInt::getHighBitsSet(
+          BitWidth, std::min(SignBits + ShiftAmt - 1, BitWidth)));
       Known.Zero.lshrInPlace(ShiftAmt);
       Known.One.lshrInPlace(ShiftAmt);
 
Index: test/Transforms/InstCombine/shift.ll
===================================================================
--- test/Transforms/InstCombine/shift.ll
+++ test/Transforms/InstCombine/shift.ll
@@ -221,6 +221,19 @@
   ret i32 %C
 }
 
+;; ((A | 0xC0000000) >> 8) << 8 === (A & 3FFFFF00) | 0xC0000000
+define i32 @test12a(i32 %A) {
+; CHECK-LABEL: @test12a(
+; CHECK-NEXT:    [[a:%.*]] = and i32 %A, 1073741568
+; CHECK-NEXT:    [[C:%.*]] = or i32 [[a]], -1073741824
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %a = or i32 %A, -1073741824
+  %B = ashr i32 %a, 8
+  %C = shl i32 %B, 8
+  ret i32 %C
+}
+
 ;; This transformation is deferred to DAGCombine:
 ;; (A >> 3) << 4 === (A & -8) * 2
 ;; The shl may be valuable to scalar evolution.
Index: test/Transforms/InstCombine/trunc.ll
===================================================================
--- test/Transforms/InstCombine/trunc.ll
+++ test/Transforms/InstCombine/trunc.ll
@@ -77,6 +77,17 @@
   ret i32 %D
 }
 
+define i32 @test5_ashr(i32 %A) {
+; CHECK-LABEL: @test5_ashr(
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 %A, 16
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %B = sext i32 %A to i128
+  %C = ashr i128 %B, 16
+  %D = trunc i128 %C to i32
+  ret i32 %D
+}
+
 define i32 @test6(i64 %A) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    [[C:%.*]] = lshr i64 %A, 32
@@ -89,6 +100,44 @@
   ret i32 %D
 }
 
+define i32 @test6_lshr(i64 %A) {
+; CHECK-LABEL: @test6_lshr(
+; CHECK-NEXT:    [[C:%.*]] = lshr i64 %A, 32
+; CHECK-NEXT:    [[D:%.*]] = trunc i64 [[C]] to i32
+; CHECK-NEXT:    ret i32 [[D]]
+;
+  %B = sext i64 %A to i128
+  %C = ashr i128 %B, 32
+  %D = trunc i128 %C to i32
+  ret i32 %D
+}
+
+define i64 @test6_ashr(i64 %A) {
+; CHECK-LABEL: @test6_ashr(
+; CHECK-NEXT:    [[C:%.*]] = ashr i64 %A, 33
+; CHECK-NEXT:    ret i64 [[C]]
+;
+  %B = sext i64 %A to i128
+  %C = ashr i128 %B, 33
+  %D = trunc i128 %C to i64
+  ret i64 %D
+}
+
+define i16 @test6_ashr_mul(i8 %X, i8 %Y) {
+; CHECK-LABEL: @test6_ashr_mul(
+; CHECK-NEXT:    [[A:%.*]] = sext i8 %X to i16
+; CHECK-NEXT:    [[B:%.*]] = sext i8 %Y to i16
+; CHECK-NEXT:    [[C:%.*]] = mul nsw i16 [[A]], [[B]]
+; CHECK-NEXT:    [[D:%.*]] = ashr i16 %C, 15
+; CHECK-NEXT:    ret i16 %D
+  %A = sext i8 %X to i32
+  %B = sext i8 %Y to i32
+  %C = mul i32 %A, %B
+  %D = ashr i32 %C, 15
+  %E = trunc i32 %D to i16
+  ret i16 %E
+}
+
 define i92 @test7(i64 %A) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 %A, 32
@@ -101,6 +150,18 @@
   ret i92 %D
 }
 
+define i92 @test7_ashr(i64 %A) {
+; CHECK-LABEL: @test7_ashr(
+; CHECK-NEXT:    [[B:%.*]] = sext i64 %A to i92
+; CHECK-NEXT:    [[C:%.*]] = ashr i92 [[B]], 32
+; CHECK-NEXT:    ret i92 [[C]]
+;
+  %B = sext i64 %A to i128
+  %C = ashr i128 %B, 32
+  %D = trunc i128 %C to i92
+  ret i92 %D
+}
+
 define i64 @test8(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:    [[TMP38:%.*]] = zext i32 %A to i64