Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -538,6 +538,64 @@ return Builder.CreateOr(V, Y); } +/// Transform patterns such as: (a > b) ? a - b : 0 +/// into: ((a > b) ? a : b) - b) +/// This produces a canonical max pattern that is more easily recognized by the +/// backend and converted into saturated subtraction instructions if those +/// exist. +/// There are 8 commuted/swapped variants of this pattern. +/// TODO: Also support a - UMIN(a,b) patterns. +static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, + const Value *TrueVal, + const Value *FalseVal, + InstCombiner::BuilderTy &Builder) { + ICmpInst::Predicate Pred = ICI->getPredicate(); + if (!ICmpInst::isUnsigned(Pred)) + return nullptr; + + // (b > a) ? 0 : a - b -> (b <= a) ? a - b : 0 + if (match(TrueVal, m_Zero())) { + Pred = ICmpInst::getInversePredicate(Pred); + std::swap(TrueVal, FalseVal); + } + + if (!match(FalseVal, m_Zero())) + return nullptr; + + Value *A = ICI->getOperand(0); + Value *B = ICI->getOperand(1); + + if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_ULT) { + // (b < a) ? a - b : 0 -> (a > b) ? a - b : 0 + std::swap(A, B); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) && + "Unexpected isUnsigned predicate!"); + + bool IsNegative = false; + + // Notice negative form of unsigned saturated subtraction ((a > b) ? b - a : + // 0) + if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A)))) { + IsNegative = true; + } else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B)))) + return nullptr; + + // If sub is used anywhere else, transformation will duplicate the instruction + if (!TrueVal->hasOneUse()) + return nullptr; + + // All check passed, convert to cannonical unsigned saturated + // subtraction form sub(max()) + // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) + Value *Sel = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B); + Value *Sub = + IsNegative ? Builder.CreateSub(B, Sel) : Builder.CreateSub(Sel, B); + return Sub; +} + /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_undef' cleared. /// @@ -860,10 +918,12 @@ if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); + if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder)) + return replaceInstUsesWith(SI, V); + return Changed ? &SI : nullptr; } - /// SI is a select whose condition is a PHI node (but the two may be in /// different blocks). See if the true/false values (V) are live in all of the /// predecessor blocks of the PHI. For example, cases like this can't be mapped: Index: test/Transforms/InstCombine/unsigned_saturated_sub.ll =================================================================== --- test/Transforms/InstCombine/unsigned_saturated_sub.ll +++ test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -12,10 +12,10 @@ define i64 @max_sub_ugt(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ugt( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0 -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ugt i64 %a, %b %sub = sub i64 %a, %b @@ -23,15 +23,30 @@ ret i64 %sel } +; (a >= b) ? a - b : 0 -> ((a >= b) ? a : b) - b) + +define i64 @max_sub_uge(i64 %a, i64 %b) { +; CHECK-LABEL: @max_sub_uge( +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] +; CHECK-NEXT: ret i64 [[TMP3]] +; + %cmp = icmp uge i64 %a, %b + %sub = sub i64 %a, %b + %sel = select i1 %cmp, i64 %sub ,i64 0 + ret i64 %sel +} + ; Again, with vectors: ; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @max_sub_ugt_vec( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub <4 x i32> [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SUB]], <4 x i32> zeroinitializer -; CHECK-NEXT: ret <4 x i32> [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[B]] +; CHECK-NEXT: ret <4 x i32> [[TMP3]] ; %cmp = icmp ugt <4 x i32> %a, %b %sub = sub <4 x i32> %a, %b @@ -44,12 +59,12 @@ define i64 @max_sub_ult(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ult( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ult i64 %b, %a %sub = sub i64 %a, %b @@ -63,12 +78,12 @@ define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ugt_sel_swapped( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ugt i64 %b, %a %sub = sub i64 %a, %b @@ -82,10 +97,10 @@ define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @max_sub_ult_sel_swapped( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[A]], [[B]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ult i64 %a, %b %sub = sub i64 %a, %b @@ -97,12 +112,12 @@ define i64 @neg_max_sub_ugt(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ugt( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ugt i64 %a, %b %sub = sub i64 %b, %a @@ -116,10 +131,10 @@ define i64 @neg_max_sub_ult(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ult( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0 -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ult i64 %b, %a %sub = sub i64 %b, %a @@ -131,10 +146,10 @@ define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped( -; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ugt i64 %b, %a %sub = sub i64 %b, %a @@ -146,12 +161,12 @@ define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) { ; CHECK-LABEL: @neg_max_sub_ult_sel_swapped( -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[SUB:%.*]] = sub i64 [[B]], [[A]] -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]] ; CHECK-NEXT: [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]] ; CHECK-NEXT: call void @use(i64 [[EXTRASUB]]) -; CHECK-NEXT: ret i64 [[SEL]] +; CHECK-NEXT: ret i64 [[TMP3]] ; %cmp = icmp ult i64 %a, %b %sub = sub i64 %b, %a