Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -843,6 +843,61 @@
   return nullptr;
 }
 
+// Match a clamping saturate pattern using xor and shifts, producing instead a
+// more canonical min(max(..)) pattern that can more easily be combined with
+// other code.
+static Value *canonicalizeShiftingSaturate(Instruction &I,
+                                           InstCombiner::BuilderTy &Builder) {
+  //   %sh1 = lshr i16 %input, 8
+  //   %t1 = trunc i16 %sh1 to i8
+  //   %t2 = trunc i16 %input to i8
+  //   %sh2 = ashr i8 %t2, 7
+  //   %c = icmp eq i8 %sh2, %t1
+  //   %sh3 = ashr i16 %input, 15
+  //   %t3 = trunc i16 %sh3 to i8
+  //   %xor = xor i8 %t3, 127
+  //   %ret = select i1 %c, i8 %t2, i8 %xor
+  // =>
+  //   %min = call i16 @llvm.smin.i16(i16 %input, i16 127)
+  //   %max = call i16 @llvm.smax.i16(i16 %min, i16 -128)
+  //   %ret = trunc i16 %max to i8
+  Type *Ty = I.getType();
+  if (!Ty->isIntOrIntVectorTy() || !isPowerOf2_32(Ty->getScalarSizeInBits()))
+    return nullptr;
+  unsigned BW = Ty->getScalarSizeInBits();
+  Value *Input;
+  ICmpInst::Predicate Pred;
+  if (!match(&I,
+             m_Select(
+                 m_ICmp(Pred,
+                        m_AShr(m_Trunc(m_Value(Input)), m_SpecificInt(BW - 1)),
+                        m_Trunc(m_LShr(m_Deferred(Input), m_SpecificInt(BW)))),
+                 m_Trunc(m_Deferred(Input)),
+                 m_Xor(m_Trunc(m_AShr(m_Deferred(Input),
+                                      m_SpecificInt(BW * 2 - 1))),
+                       m_SpecificInt(APInt::getLowBitsSet(BW, BW - 1))))))
+    return nullptr;
+
+  unsigned InputBW = Input->getType()->getScalarSizeInBits();
+  if (Pred != ICmpInst::ICMP_EQ || InputBW > 2 * BW)
+    return nullptr;
+
+  // This produces less instructions if the icmp is not used elsewhere, or at
+  // least one of the select operands doesn't have extra uses.
+  if (!I.getOperand(0)->hasOneUse() ||
+      (I.getOperand(1)->hasNUsesOrMore(3) && !I.getOperand(2)->hasOneUse()))
+    return nullptr;
+
+  Value *MinVal = ConstantInt::get(Input->getType(),
+                                   APInt::getSignedMaxValue(BW).sext(InputBW));
+  Value *Min = createMinMax(Builder, SPF_SMIN, Input, MinVal);
+  Value *MaxVal = ConstantInt::get(Input->getType(),
+                                   APInt::getSignedMinValue(BW).sext(InputBW));
+  Value *Max = createMinMax(Builder, SPF_SMAX, Min, MaxVal);
+  Value *Trunc = Builder.CreateTrunc(Max, Ty);
+  return Trunc;
+}
+
 /// Fold the following code sequence:
 /// \code
 ///   int a = ctlz(x & -x);
@@ -1548,6 +1603,9 @@
   if (Value *V = canonicalizeSaturatedAdd(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = canonicalizeShiftingSaturate(SI, Builder))
+    return replaceInstUsesWith(SI, V);
+
   return Changed ? &SI : nullptr;
 }
 
Index: llvm/test/Transforms/InstCombine/truncating-saturate.ll
===================================================================
--- llvm/test/Transforms/InstCombine/truncating-saturate.ll
+++ llvm/test/Transforms/InstCombine/truncating-saturate.ll
@@ -7,16 +7,12 @@
 
 define i8 @testi16i8(i16 %add) {
 ; CHECK-LABEL: @testi16i8(
-; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
-; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
-; CHECK-NEXT:    ret i8 [[COND_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[ADD:%.*]], 127
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 127
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], -128
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 -128
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8
+; CHECK-NEXT:    ret i8 [[TMP5]]
 ;
   %sh = lshr i16 %add, 8
   %conv.i = trunc i16 %sh to i8
@@ -32,16 +28,12 @@
 
 define i32 @testi64i32(i64 %add) {
 ; CHECK-LABEL: @testi64i32(
-; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
-; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
-; CHECK-NEXT:    ret i32 [[COND_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[ADD:%.*]], 2147483647
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 2147483647
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i64 [[TMP2]], -2147483648
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 -2147483648
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
+; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %sh = lshr i64 %add, 32
   %conv.i = trunc i64 %sh to i32
@@ -57,19 +49,8 @@
 
 define i32 @testi64i32addsat(i32 %a, i32 %b) {
 ; CHECK-LABEL: @testi64i32addsat(
-; CHECK-NEXT:    [[SA:%.*]] = sext i32 [[A:%.*]] to i64
-; CHECK-NEXT:    [[SB:%.*]] = sext i32 [[B:%.*]] to i64
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[SA]], [[SB]]
-; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD]], 32
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
-; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
-; CHECK-NEXT:    ret i32 [[COND_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %sa = sext i32 %a to i64
   %sb = sext i32 %b to i64
@@ -88,16 +69,12 @@
 
 define <4 x i8> @testv4i16i8(<4 x i16> %add) {
 ; CHECK-LABEL: @testv4i16i8(
-; CHECK-NEXT:    [[SH:%.*]] = lshr <4 x i16> [[ADD:%.*]], <i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8>
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8>
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], <i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], <i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8>
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], <i8 127, i8 127, i8 127, i8 127>
-; CHECK-NEXT:    [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]]
-; CHECK-NEXT:    ret <4 x i8> [[COND_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i16> [[ADD:%.*]], <i16 127, i16 127, i16 127, i16 127>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[ADD]], <4 x i16> <i16 127, i16 127, i16 127, i16 127>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i16> [[TMP2]], <i16 -128, i16 -128, i16 -128, i16 -128>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP2]], <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8>
+; CHECK-NEXT:    ret <4 x i8> [[TMP5]]
 ;
   %sh = lshr <4 x i16> %add, <i16 8, i16 8, i16 8, i16 8>
   %conv.i = trunc <4 x i16> %sh to <4 x i8>
@@ -113,19 +90,8 @@
 
 define <4 x i8> @testv4i16i8add(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-LABEL: @testv4i16i8add(
-; CHECK-NEXT:    [[SA:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i16>
-; CHECK-NEXT:    [[SB:%.*]] = sext <4 x i8> [[B:%.*]] to <4 x i16>
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw <4 x i16> [[SA]], [[SB]]
-; CHECK-NEXT:    [[SH:%.*]] = lshr <4 x i16> [[ADD]], <i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8>
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8>
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], <i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], <i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8>
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], <i8 127, i8 127, i8 127, i8 127>
-; CHECK-NEXT:    [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]]
-; CHECK-NEXT:    ret <4 x i8> [[COND_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]])
+; CHECK-NEXT:    ret <4 x i8> [[TMP1]]
 ;
   %sa = sext <4 x i8> %a to <4 x i16>
   %sb = sext <4 x i8> %b to <4 x i16>
@@ -144,16 +110,12 @@
 
 define i8 @testi16i8_revcmp(i16 %add) {
 ; CHECK-LABEL: @testi16i8_revcmp(
-; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
-; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]]
-; CHECK-NEXT:    ret i8 [[COND_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[ADD:%.*]], 127
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 127
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], -128
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 -128
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8
+; CHECK-NEXT:    ret i8 [[TMP5]]
 ;
   %sh = lshr i16 %add, 8
   %conv.i = trunc i16 %sh to i8
@@ -169,16 +131,12 @@
 
 define i8 @testi16i8_revselect(i16 %add) {
 ; CHECK-LABEL: @testi16i8_revselect(
-; CHECK-NEXT:    [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i16 [[SH]] to i8
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7
-; CHECK-NEXT:    [[CMP_NOT_I_NOT:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127
-; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I_NOT]], i8 [[CONV1_I]], i8 [[XOR_I]]
-; CHECK-NEXT:    ret i8 [[COND_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i16 [[ADD:%.*]], 127
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 127
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], -128
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 -128
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8
+; CHECK-NEXT:    ret i8 [[TMP5]]
 ;
   %sh = lshr i16 %add, 8
   %conv.i = trunc i16 %sh to i8
@@ -323,17 +281,16 @@
 
 define i32 @oneusexor(i64 %add) {
 ; CHECK-LABEL: @oneusexor(
-; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
+; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD:%.*]], 63
 ; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
 ; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
-; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[ADD]], 2147483647
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 2147483647
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i64 [[TMP2]], -2147483648
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 -2147483648
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[XOR_I]])
-; CHECK-NEXT:    ret i32 [[COND_I]]
+; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %sh = lshr i64 %add, 32
   %conv.i = trunc i64 %sh to i32
@@ -350,17 +307,14 @@
 
 define i32 @oneuseconv(i64 %add) {
 ; CHECK-LABEL: @oneuseconv(
-; CHECK-NEXT:    [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32
-; CHECK-NEXT:    [[CONV_I:%.*]] = trunc i64 [[SH]] to i32
-; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32
-; CHECK-NEXT:    [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31
-; CHECK-NEXT:    [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]]
-; CHECK-NEXT:    [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63
-; CHECK-NEXT:    [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32
-; CHECK-NEXT:    [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647
-; CHECK-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]]
+; CHECK-NEXT:    [[CONV1_I:%.*]] = trunc i64 [[ADD:%.*]] to i32
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i64 [[ADD]], 2147483647
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 2147483647
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i64 [[TMP2]], -2147483648
+; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 -2147483648
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
 ; CHECK-NEXT:    call void @use(i32 [[CONV1_I]])
-; CHECK-NEXT:    ret i32 [[COND_I]]
+; CHECK-NEXT:    ret i32 [[TMP5]]
 ;
   %sh = lshr i64 %add, 32
   %conv.i = trunc i64 %sh to i32