Index: llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -843,6 +843,58 @@ return nullptr; } +// Match a clamping saturate pattern using xor and shifts, producing instead a +// more canonical min(max(..)) pattern that can more easily be combined with +// other code. +static Value *canonicalizeShiftingSaturate(Instruction &I, + InstCombiner::BuilderTy &Builder) { + // %sh1 = lshr i16 %add, 8 + // %t1 = trunc i16 %sh1 to i8 + // %t2 = trunc i16 %add to i8 + // %sh2 = ashr i8 %t2, 7 + // %c = icmp eq i8 %sh2, %t1 + // %sh3 = ashr i16 %add, 15 + // %t3 = trunc i16 %sh3 to i8 + // %x = xor i8 %t3, 127 + // %ret = select i1 %c, i8 %t2, i8 %x + // => + // %min = call i16 @llvm.smin.i16(i16 %x, i16 127) + // %max = call i16 @llvm.smax.i16(i16 %min, i16 -128) + // %ret = trunc i16 %max to i8 + Type *Ty = I.getType(); + if (!Ty->isIntOrIntVectorTy() || !isPowerOf2_32(Ty->getScalarSizeInBits())) + return nullptr; + unsigned BW = Ty->getScalarSizeInBits(); + Value *Add; + ICmpInst::Predicate Pred; + if (!match(&I, m_Select( + m_ICmp(Pred, m_AShr(m_Trunc(m_Value(Add)), m_SpecificInt(BW - 1)), + m_Trunc(m_LShr(m_Deferred(Add), m_SpecificInt(BW)))), + m_Trunc(m_Deferred(Add)), + m_Xor(m_Trunc(m_AShr(m_Deferred(Add), m_SpecificInt(BW * 2 - 1))), + m_SpecificInt(APInt::getLowBitsSet(BW, BW - 1)))))) + return nullptr; + + unsigned AddBW = Add->getType()->getScalarSizeInBits(); + if (Pred != ICmpInst::ICMP_EQ || AddBW > 2 * BW) + return nullptr; + + // This produces less instructions if the icmp is not used elsewhere, or at + // least one of the select operands doesn't have extra uses. + if (!I.getOperand(0)->hasOneUse() || + (I.getOperand(1)->hasNUsesOrMore(3) && !I.getOperand(2)->hasOneUse())) + return nullptr; + + Value *MinVal = ConstantInt::get(Add->getType(), + APInt::getSignedMaxValue(BW).sext(AddBW)); + Value *Min = createMinMax(Builder, SPF_SMIN, Add, MinVal); + Value *MaxVal = ConstantInt::get(Add->getType(), + APInt::getSignedMinValue(BW).sext(AddBW)); + Value *Max = createMinMax(Builder, SPF_SMAX, Min, MaxVal); + Value *Trunc = Builder.CreateTrunc(Max, Ty); + return Trunc; +} + /// Fold the following code sequence: /// \code /// int a = ctlz(x & -x); @@ -1548,6 +1600,9 @@ if (Value *V = canonicalizeSaturatedAdd(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); + if (Value *V = canonicalizeShiftingSaturate(SI, Builder)) + return replaceInstUsesWith(SI, V); + return Changed ? &SI : nullptr; } Index: llvm/test/Transforms/InstCombine/truncating-saturate.ll =================================================================== --- llvm/test/Transforms/InstCombine/truncating-saturate.ll +++ llvm/test/Transforms/InstCombine/truncating-saturate.ll @@ -7,16 +7,12 @@ define i8 @testi16i8(i16 %add) { ; CHECK-LABEL: @testi16i8( -; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 -; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] -; CHECK-NEXT: ret i8 [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[ADD:%.*]], 127 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 127 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], -128 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 -128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8 +; CHECK-NEXT: ret i8 [[TMP5]] ; %sh = lshr i16 %add, 8 %conv.i = trunc i16 %sh to i8 @@ -32,16 +28,12 @@ define i32 @testi64i32(i64 %add) { ; CHECK-LABEL: @testi64i32( -; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] -; CHECK-NEXT: ret i32 [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[ADD:%.*]], 2147483647 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 2147483647 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i64 [[TMP2]], -2147483648 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 -2147483648 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NEXT: ret i32 [[TMP5]] ; %sh = lshr i64 %add, 32 %conv.i = trunc i64 %sh to i32 @@ -57,19 +49,8 @@ define i32 @testi64i32addsat(i32 %a, i32 %b) { ; CHECK-LABEL: @testi64i32addsat( -; CHECK-NEXT: [[SA:%.*]] = sext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[SB:%.*]] = sext i32 [[B:%.*]] to i64 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[SA]], [[SB]] -; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD]], 32 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] -; CHECK-NEXT: ret i32 [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[A:%.*]], i32 [[B:%.*]]) +; CHECK-NEXT: ret i32 [[TMP1]] ; %sa = sext i32 %a to i64 %sb = sext i32 %b to i64 @@ -88,16 +69,12 @@ define <4 x i8> @testv4i16i8(<4 x i16> %add) { ; CHECK-LABEL: @testv4i16i8( -; CHECK-NEXT: [[SH:%.*]] = lshr <4 x i16> [[ADD:%.*]], -; CHECK-NEXT: [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8> -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8> -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8> -; CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], -; CHECK-NEXT: [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]] -; CHECK-NEXT: ret <4 x i8> [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i16> [[ADD:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[ADD]], <4 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP2]], <4 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8> +; CHECK-NEXT: ret <4 x i8> [[TMP5]] ; %sh = lshr <4 x i16> %add, %conv.i = trunc <4 x i16> %sh to <4 x i8> @@ -113,19 +90,8 @@ define <4 x i8> @testv4i16i8add(<4 x i8> %a, <4 x i8> %b) { ; CHECK-LABEL: @testv4i16i8add( -; CHECK-NEXT: [[SA:%.*]] = sext <4 x i8> [[A:%.*]] to <4 x i16> -; CHECK-NEXT: [[SB:%.*]] = sext <4 x i8> [[B:%.*]] to <4 x i16> -; CHECK-NEXT: [[ADD:%.*]] = add nsw <4 x i16> [[SA]], [[SB]] -; CHECK-NEXT: [[SH:%.*]] = lshr <4 x i16> [[ADD]], -; CHECK-NEXT: [[CONV_I:%.*]] = trunc <4 x i16> [[SH]] to <4 x i8> -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc <4 x i16> [[ADD]] to <4 x i8> -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr <4 x i8> [[CONV1_I]], -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq <4 x i8> [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr <4 x i16> [[ADD]], -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc <4 x i16> [[SHR4_I]] to <4 x i8> -; CHECK-NEXT: [[XOR_I:%.*]] = xor <4 x i8> [[CONV5_I]], -; CHECK-NEXT: [[COND_I:%.*]] = select <4 x i1> [[CMP_NOT_I]], <4 x i8> [[CONV1_I]], <4 x i8> [[XOR_I]] -; CHECK-NEXT: ret <4 x i8> [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) +; CHECK-NEXT: ret <4 x i8> [[TMP1]] ; %sa = sext <4 x i8> %a to <4 x i16> %sb = sext <4 x i8> %b to <4 x i16> @@ -144,16 +110,12 @@ define i8 @testi16i8_revcmp(i16 %add) { ; CHECK-LABEL: @testi16i8_revcmp( -; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 -; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i8 [[CONV1_I]], i8 [[XOR_I]] -; CHECK-NEXT: ret i8 [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[ADD:%.*]], 127 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 127 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], -128 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 -128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8 +; CHECK-NEXT: ret i8 [[TMP5]] ; %sh = lshr i16 %add, 8 %conv.i = trunc i16 %sh to i8 @@ -169,16 +131,12 @@ define i8 @testi16i8_revselect(i16 %add) { ; CHECK-LABEL: @testi16i8_revselect( -; CHECK-NEXT: [[SH:%.*]] = lshr i16 [[ADD:%.*]], 8 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i16 [[SH]] to i8 -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i16 [[ADD]] to i8 -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i8 [[CONV1_I]], 7 -; CHECK-NEXT: [[CMP_NOT_I_NOT:%.*]] = icmp eq i8 [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i16 [[ADD]], 15 -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i16 [[SHR4_I]] to i8 -; CHECK-NEXT: [[XOR_I:%.*]] = xor i8 [[CONV5_I]], 127 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I_NOT]], i8 [[CONV1_I]], i8 [[XOR_I]] -; CHECK-NEXT: ret i8 [[COND_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i16 [[ADD:%.*]], 127 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 127 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], -128 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 -128 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8 +; CHECK-NEXT: ret i8 [[TMP5]] ; %sh = lshr i16 %add, 8 %conv.i = trunc i16 %sh to i8 @@ -323,17 +281,16 @@ define i32 @oneusexor(i64 %add) { ; CHECK-LABEL: @oneusexor( -; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 +; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD:%.*]], 63 ; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 ; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[ADD]], 2147483647 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 2147483647 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i64 [[TMP2]], -2147483648 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 -2147483648 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 ; CHECK-NEXT: call void @use(i32 [[XOR_I]]) -; CHECK-NEXT: ret i32 [[COND_I]] +; CHECK-NEXT: ret i32 [[TMP5]] ; %sh = lshr i64 %add, 32 %conv.i = trunc i64 %sh to i32 @@ -350,17 +307,14 @@ define i32 @oneuseconv(i64 %add) { ; CHECK-LABEL: @oneuseconv( -; CHECK-NEXT: [[SH:%.*]] = lshr i64 [[ADD:%.*]], 32 -; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[SH]] to i32 -; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD]] to i32 -; CHECK-NEXT: [[SHR2_I:%.*]] = ashr i32 [[CONV1_I]], 31 -; CHECK-NEXT: [[CMP_NOT_I:%.*]] = icmp eq i32 [[SHR2_I]], [[CONV_I]] -; CHECK-NEXT: [[SHR4_I:%.*]] = ashr i64 [[ADD]], 63 -; CHECK-NEXT: [[CONV5_I:%.*]] = trunc i64 [[SHR4_I]] to i32 -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[CONV5_I]], 2147483647 -; CHECK-NEXT: [[COND_I:%.*]] = select i1 [[CMP_NOT_I]], i32 [[CONV1_I]], i32 [[XOR_I]] +; CHECK-NEXT: [[CONV1_I:%.*]] = trunc i64 [[ADD:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[ADD]], 2147483647 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 2147483647 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i64 [[TMP2]], -2147483648 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 -2147483648 +; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 ; CHECK-NEXT: call void @use(i32 [[CONV1_I]]) -; CHECK-NEXT: ret i32 [[COND_I]] +; CHECK-NEXT: ret i32 [[TMP5]] ; %sh = lshr i64 %add, 32 %conv.i = trunc i64 %sh to i32