Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -5315,28 +5315,61 @@
     break;
 
     case Instruction::AShr:
-      // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS))
-        if (Operator *L = dyn_cast<Operator>(BO->LHS))
-          if (L->getOpcode() == Instruction::Shl &&
-              L->getOperand(1) == BO->RHS) {
-            uint64_t BitWidth = getTypeSizeInBits(BO->LHS->getType());
-
-            // If the shift count is not less than the bitwidth, the result of
-            // the shift is undefined. Don't try to analyze it, because the
-            // resolution chosen here may differ from the resolution chosen in
-            // other parts of the compiler.
-            if (CI->getValue().uge(BitWidth))
-              break;
+      // AShr X, C, where C is a constant.
+      ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS);
+      if (!CI)
+        break;
 
-            uint64_t Amt = BitWidth - CI->getZExtValue();
-            if (Amt == BitWidth)
-              return getSCEV(L->getOperand(0)); // shift by zero --> noop
+      Type *SExtTy = BO->LHS->getType();
+      uint64_t BitWidth = getTypeSizeInBits(SExtTy);
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (CI->getValue().uge(BitWidth))
+        break;
+
+      if (CI->isNullValue())
+        return getSCEV(BO->LHS); // shift by zero --> noop
+
+      uint64_t AShrAmt = CI->getZExtValue();
+      uint64_t TruncToWidth = BitWidth - AShrAmt;
+      Type *TruncTy = IntegerType::get(getContext(), TruncToWidth);
+
+      Operator *L = dyn_cast<Operator>(BO->LHS);
+      if (L && L->getOpcode() == Instruction::Shl) {
+        // X = Shl A, C0
+        // Y = AShr X, C1
+
+        const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0));
+        if (L->getOperand(1) == BO->RHS)
+          // For a two-shift sext-inreg, i.e. n = m,
+          // use sext(trunc(x)) as the SCEV expression.
+          return getSignExtendExpr(
+              getTruncateExpr(ShlOp0SCEV, TruncTy), SExtTy);
+
+        ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1));
+        if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) {
+          uint64_t ShlAmt = ShlAmtCI->getZExtValue();
+          if (ShlAmt > AShrAmt) {
+            // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV
+            // expression. We already checked that ShlAmt < BitWidth, so
+            // the multiplier, 1 << (LhlAmt - AShrAmt), fits into TruncTy as
+            // LhlAmt - AShrAmt < Amt.
+            APInt Mul = APInt::getOneBitSet(TruncToWidth, ShlAmt - AShrAmt);
             return getSignExtendExpr(
-                getTruncateExpr(getSCEV(L->getOperand(0)),
-                                IntegerType::get(getContext(), Amt)),
-                BO->LHS->getType());
+                getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy),
+                getConstant(Mul)), SExtTy);
           }
+        }
+      }
+      if (Value *LV = dyn_cast<Value>(BO->LHS)) {
+        // Transform AShr i32 %x, C to sext(trunc(udiv(x, (1 << C)))).
+        APInt Div = APInt::getOneBitSet(BitWidth, CI->getZExtValue());
+        return getSignExtendExpr(
+            getTruncateExpr(getUDivExpr(getSCEV(LV), getConstant(Div)),
+            TruncTy), SExtTy);
+      }
       break;
     }
   }
Index: test/Analysis/ScalarEvolution/flags-from-poison.ll
===================================================================
--- test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -582,7 +582,7 @@
   %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
 
 ; CHECK: %index32 =
-; CHECK: --> {((-1 * %halfsub)<nsw> + %start)<nsw>,+,1}<nsw>
+; CHECK: --> {((-1 * (sext i31 (trunc i32 (%sub /u 2) to i31) to i32))<nsw> + %start)<nsw>,+,1}<nsw>
   %index32 = sub nsw i32 %i, %halfsub
   %index64 = sext i32 %index32 to i64
 
Index: test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll
===================================================================
--- test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll
+++ test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll
@@ -3,6 +3,7 @@
 ; Check SCEV expansion uses existing value when unrolling an inner loop with runtime trip count in a loop nest.
 ; CHECK-LABEL: @foo(
 ; CHECK: select
+; CHECK: select {{.*}} i32 %shr
 ; CHECK-NOT: select
 ; CHECK: ret
 
Index: test/Analysis/ScalarEvolution/sext-div.ll
===================================================================
--- /dev/null
+++ test/Analysis/ScalarEvolution/sext-div.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+
+; CHECK: %tmp10 = ashr exact i64 %tmp8, 13
+; CHECK-NEXT: --> ({0,+,1}<nuw><nsw><%bb7> /u 8192) {{.*}} Exits: ((-1 + (zext i32 %arg2 to i64))<nsw> /u 8192)
+; CHECK: %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %tmp10
+; CHECK-NEXT: --> ((4 * ({0,+,1}<nuw><nsw><%bb7> /u 8192))<nuw><nsw> + %arg)<nsw> {{.*}} Exits: ((4 * ((-1 + (zext i32 %arg2 to i64))<nsw> /u 8192)) + %arg)
+
+define void @foo(i32* nocapture %arg, i32 %arg1, i32 %arg2) {
+bb:
+  %tmp = icmp sgt i32 %arg2, 0
+  br i1 %tmp, label %bb3, label %bb6
+
+bb3:                                              ; preds = %bb
+  %tmp4 = zext i32 %arg2 to i64
+  br label %bb7
+
+bb5:                                              ; preds = %bb7
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  ret void
+
+bb7:                                              ; preds = %bb7, %bb3
+  %tmp8 = phi i64 [ %tmp18, %bb7 ], [ 0, %bb3 ]
+  %tmp10 = ashr exact i64 %tmp8, 13
+  %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %tmp10
+  %tmp12 = load i32, i32* %tmp11, align 4
+  %tmp13 = sub nsw i32 %tmp12, %arg1
+  store i32 %tmp13, i32* %tmp11, align 4
+  %tmp18 = add nuw nsw i64 %tmp8, 1
+  %tmp19 = icmp eq i64 %tmp18, %tmp4
+  br i1 %tmp19, label %bb5, label %bb7
+}
Index: test/Analysis/ScalarEvolution/sext-mul.ll
===================================================================
--- /dev/null
+++ test/Analysis/ScalarEvolution/sext-mul.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: %tmp9 = shl i64 %tmp8, 33
+; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))		LoopDispositions: { %bb7: Computable }
+; CHECK: %tmp10 = ashr exact i64 %tmp9, 32
+; CHECK-NEXT: --> {{.*}} Exits: (sext i32 (-2 + (2 * %arg2)) to i64)		LoopDispositions: { %bb7: Computable }
+; CHECK: %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %tmp10
+; CHECK: --> {{.*}} Exits: ((4 * (sext i32 (-2 + (2 * %arg2)) to i64)) + %arg)		LoopDispositions: { %bb7: Computable }
+; CHECK:  %tmp14 = or i64 %tmp10, 1
+; CHECK: --> {{.*}} Exits: (1 + (sext i32 (-2 + (2 * %arg2)) to i64))<nsw>		LoopDispositions: { %bb7: Computable }
+; CHECK: %tmp15 = getelementptr inbounds i32, i32* %arg, i64 %tmp14
+; CHECK: --> {{.*}} Exits: (4 + (4 * (sext i32 (-2 + (2 * %arg2)) to i64)) + %arg)		LoopDispositions: { %bb7: Computable }
+; CHECK:Loop %bb7: backedge-taken count is (-1 + (zext i32 %arg2 to i64))<nsw>
+; CHECK:Loop %bb7: max backedge-taken count is -1
+; CHECK:Loop %bb7: Predicated backedge-taken count is (-1 + (zext i32 %arg2 to i64))<nsw>
+
+define void @foo(i32* nocapture %arg, i32 %arg1, i32 %arg2) {
+bb:
+  %tmp = icmp sgt i32 %arg2, 0
+  br i1 %tmp, label %bb3, label %bb6
+
+bb3:                                              ; preds = %bb
+  %tmp4 = zext i32 %arg2 to i64
+  br label %bb7
+
+bb5:                                              ; preds = %bb7
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  ret void
+
+bb7:                                              ; preds = %bb7, %bb3
+  %tmp8 = phi i64 [ %tmp18, %bb7 ], [ 0, %bb3 ]
+  %tmp9 = shl i64 %tmp8, 33
+  %tmp10 = ashr exact i64 %tmp9, 32
+  %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %tmp10
+  %tmp12 = load i32, i32* %tmp11, align 4
+  %tmp13 = sub nsw i32 %tmp12, %arg1
+  store i32 %tmp13, i32* %tmp11, align 4
+  %tmp14 = or i64 %tmp10, 1
+  %tmp15 = getelementptr inbounds i32, i32* %arg, i64 %tmp14
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = mul nsw i32 %tmp16, %arg1
+  store i32 %tmp17, i32* %tmp15, align 4
+  %tmp18 = add nuw nsw i64 %tmp8, 1
+  %tmp19 = icmp eq i64 %tmp18, %tmp4
+  br i1 %tmp19, label %bb5, label %bb7
+}
+
+; CHECK: %t10 = ashr exact i128 %t9, 1
+; CHECK-NEXT: --> {{.*}} Exits: (sext i127 (-633825300114114700748351602688 + (633825300114114700748351602688 * (zext i32 %arg5 to i127))) to i128)		LoopDispositions: { %bb7: Computable }
+; CHECK: %t14 = or i128 %t10, 1
+; CHECK-NEXT: --> {{.*}} Exits: (1 + (sext i127 (-633825300114114700748351602688 + (633825300114114700748351602688 * (zext i32 %arg5 to i127))) to i128))<nsw>		LoopDispositions: { %bb7: Computable }
+; CHECK: Loop %bb7: backedge-taken count is (-1 + (zext i32 %arg5 to i128))<nsw>
+; CHECK-NEXT: Loop %bb7: max backedge-taken count is -1
+; CHECK-NEXT: Loop %bb7: Predicated backedge-taken count is (-1 + (zext i32 %arg5 to i128))<nsw>
+
+define void @goo(i32* nocapture %arg3, i32 %arg4, i32 %arg5) {
+bb:
+  %t = icmp sgt i32 %arg5, 0
+  br i1 %t, label %bb3, label %bb6
+
+bb3:                                              ; preds = %bb
+  %t4 = zext i32 %arg5 to i128
+  br label %bb7
+
+bb5:                                              ; preds = %bb7
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  ret void
+
+bb7:                                              ; preds = %bb7, %bb3
+  %t8 = phi i128 [ %t18, %bb7 ], [ 0, %bb3 ]
+  %t9 = shl i128 %t8, 100
+  %t10 = ashr exact i128 %t9, 1
+  %t11 = getelementptr inbounds i32, i32* %arg3, i128 %t10
+  %t12 = load i32, i32* %t11, align 4
+  %t13 = sub nsw i32 %t12, %arg4
+  store i32 %t13, i32* %t11, align 4
+  %t14 = or i128 %t10, 1
+  %t15 = getelementptr inbounds i32, i32* %arg3, i128 %t14
+  %t16 = load i32, i32* %t15, align 4
+  %t17 = mul nsw i32 %t16, %arg4
+  store i32 %t17, i32* %t15, align 4
+  %t18 = add nuw nsw i128 %t8, 1
+  %t19 = icmp eq i128 %t18, %t4
+  br i1 %t19, label %bb5, label %bb7
+}
Index: test/Analysis/ScalarEvolution/sext-zero.ll
===================================================================
--- /dev/null
+++ test/Analysis/ScalarEvolution/sext-zero.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK:  %tmp9 = shl i64 %tmp8, 33
+; CHECK-NEXT:  -->  {0,+,8589934592}<%bb7> U: [0,-8589934591) S: [-9223372036854775808,9223372028264841217)		Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))		LoopDispositions: { %bb7: Computable }
+; CHECK-NEXT:  %tmp10 = ashr exact i64 %tmp9, 0
+; CHECK-NEXT:  -->  {0,+,8589934592}<%bb7> U: [0,-8589934591) S: [-9223372036854775808,9223372028264841217)		Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))		LoopDispositions: { %bb7: Computable }
+
+define void @foo(i32* nocapture %arg, i32 %arg1, i32 %arg2) {
+bb:
+  %tmp = icmp sgt i32 %arg2, 0
+  br i1 %tmp, label %bb3, label %bb6
+
+bb3:                                              ; preds = %bb
+  %tmp4 = zext i32 %arg2 to i64
+  br label %bb7
+
+bb5:                                              ; preds = %bb7
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb
+  ret void
+
+bb7:                                              ; preds = %bb7, %bb3
+  %tmp8 = phi i64 [ %tmp18, %bb7 ], [ 0, %bb3 ]
+  %tmp9 = shl i64 %tmp8, 33
+  %tmp10 = ashr exact i64 %tmp9, 0
+  %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %tmp10
+  %tmp12 = load i32, i32* %tmp11, align 4
+  %tmp13 = sub nsw i32 %tmp12, %arg1
+  store i32 %tmp13, i32* %tmp11, align 4
+  %tmp14 = or i64 %tmp10, 1
+  %tmp15 = getelementptr inbounds i32, i32* %arg, i64 %tmp14
+  %tmp16 = load i32, i32* %tmp15, align 4
+  %tmp17 = mul nsw i32 %tmp16, %arg1
+  store i32 %tmp17, i32* %tmp15, align 4
+  %tmp18 = add nuw nsw i64 %tmp8, 1
+  %tmp19 = icmp eq i64 %tmp18, %tmp4
+  br i1 %tmp19, label %bb5, label %bb7
+}