Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -559,6 +559,9 @@
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
   }
 
+  // FIXME: Maybe combine the next two transforms to handle the no cast case
+  // more efficiently. Support vector types. Cleanup code by using m_OneUse.
+
   // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
   Value *A = nullptr; ConstantInt *Cst = nullptr;
   if (Src->hasOneUse() &&
@@ -588,15 +591,20 @@
   // the sign bit of the original value; performing ashr instead of lshr
   // generates bits of the same value as the sign bit.
   if (Src->hasOneUse() &&
-      match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
-      cast<Instruction>(Src)->getOperand(0)->hasOneUse()) {
+      match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst)))) {
+    Value *SExt = cast<Instruction>(Src)->getOperand(0);
+    const unsigned SExtSize = SExt->getType()->getPrimitiveSizeInBits();
     const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+    unsigned ShiftAmt = Cst->getZExtValue();
     // This optimization can be only performed when zero bits generated by
     // the original lshr aren't pulled into the value after truncation, so we
-    // can only shift by values smaller than the size of destination type (in
-    // bits).
-    if (Cst->getValue().ult(ASize)) {
-      Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
+    // can only shift by values no larger than the number of extension bits.
+    // FIXME: Instead of bailing when the shift is too large, use and to clear
+    // the extra bits.
+    if (SExt->hasOneUse() && ShiftAmt <= SExtSize - ASize) {
+      // If shifting by the size of the original value in bits or more, it is
+      // being filled with the sign bit, so shift by ASize-1 to avoid ub.
+      Value *Shift = Builder->CreateAShr(A, std::min(ShiftAmt, ASize-1));
       Shift->takeName(Src);
       return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
     }
Index: llvm/trunk/test/Transforms/InstCombine/cast.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/cast.ll
+++ llvm/trunk/test/Transforms/InstCombine/cast.ll
@@ -1436,8 +1436,10 @@
 ; Do not optimize to ashr i64 (shift by 48 > 96 - 64)
 define i64 @test91(i64 %A) {
 ; CHECK-LABEL: @test91(
-; CHECK-NEXT:    [[C:%.*]] = ashr i64 %A, 48
-; CHECK-NEXT:    ret i64 [[C]]
+; CHECK-NEXT:    [[B:%.*]] = sext i64 %A to i96
+; CHECK-NEXT:    [[C:%.*]] = lshr i96 [[B]], 48
+; CHECK-NEXT:    [[D:%.*]] = trunc i96 [[C]] to i64
+; CHECK-NEXT:    ret i64 [[D]]
 ;
   %B = sext i64 %A to i96
   %C = lshr i96 %B, 48
@@ -1460,10 +1462,8 @@
 ; When optimizing to ashr i32, don't shift by more than 31.
 define i32 @test93(i32 %A) {
 ; CHECK-LABEL: @test93(
-; CHECK-NEXT:    [[B:%.*]] = sext i32 %A to i96
-; CHECK-NEXT:    [[C:%.*]] = lshr i96 [[B]], 64
-; CHECK-NEXT:    [[D:%.*]] = trunc i96 [[C]] to i32
-; CHECK-NEXT:    ret i32 [[D]]
+; CHECK-NEXT:    [[C:%.*]] = ashr i32 %A, 31
+; CHECK-NEXT:    ret i32 [[C]]
 ;
   %B = sext i32 %A to i96
   %C = lshr i96 %B, 64