Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -538,6 +538,61 @@ return Builder.CreateOr(V, Y); } +/// We want to turn any of the possible patterns to ((a > b) ? a : b) - b) which +/// is the cannonical for unsigned saturation subtraction pattern. +/// TODO: Also support a - UMIN(a,b) patterns. +static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI, + const Value *TrueVal, + const Value *FalseVal, + InstCombiner::BuilderTy &Builder) { + ICmpInst::Predicate Pred = ICI->getPredicate(); + if (!ICmpInst::isUnsigned(Pred)) + return nullptr; + + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + + // (b > a) ? 0 : a - b -> (b <= a) ? a - b : 0 + if (match(TrueVal, m_Zero())) { + Pred = ICmpInst::getInversePredicate(Pred); + std::swap(TrueVal, FalseVal); + } + + if (!match(FalseVal, m_Zero())) + return nullptr; + + // If sub is used anywhere else, transformation will duplicate the instruction + if (!TrueVal->hasOneUse()) + return nullptr; + + if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_ULT) { + // (b < a) ? a - b : 0 -> (a > b) ? a - b : 0 + std::swap(CmpLHS, CmpRHS); + Pred = ICmpInst::getSwappedPredicate(Pred); + } + + assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) && + "Unexpected isUnsigned predicate!"); + + bool IsNegative = false; + + // Notice negative form of unsigned saturated subtraction ((a > b) ? b - a : + // 0) + if (match(TrueVal, m_Sub(m_Specific(CmpRHS), m_Specific(CmpLHS)))) { + IsNegative = true; + } else if (!match(TrueVal, m_Sub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) + return nullptr; + + // All check passed, convert to cannonical unsigned saturated + // subtraction form sub(max()) + // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) + Value *Sel = Builder.CreateSelect(Builder.CreateICmp(Pred, CmpLHS, CmpRHS), + CmpLHS, CmpRHS); + Value *Sub = IsNegative ? Builder.CreateSub(CmpRHS, Sel) + : Builder.CreateSub(Sel, CmpRHS); + return Sub; +} + /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single /// call to cttz/ctlz with flag 'is_zero_undef' cleared. /// @@ -860,10 +915,12 @@ if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder)) return replaceInstUsesWith(SI, V); + if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder)) + return replaceInstUsesWith(SI, V); + return Changed ? &SI : nullptr; } - /// SI is a select whose condition is a PHI node (but the two may be in /// different blocks). See if the true/false values (V) are live in all of the /// predecessor blocks of the PHI. For example, cases like this can't be mapped: Index: test/Transforms/InstCombine/unsigned_saturated_sub.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/unsigned_saturated_sub.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Canonicalization of unsigned saturated subtraction idiom is tested here. +; The backend recognition of pattern test is in test/CodeGen/X86/psubus.ll. +; RUN: opt -instcombine -S < %s | FileCheck %s + +;(a > b) ? a - b : 0 -> ((a > b) ? a : b) - b) +define i64 @max_sub_ugt(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @max_sub_ugt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[LHS]], i64 [[RHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[RHS]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ugt i64 %lhs, %rhs + %sub = sub i64 %lhs, %rhs + %sel = select i1 %cmp, i64 %sub ,i64 0 + ret i64 %sel +} + +;(a >= b) ? a - b : 0 -> ((a >= b) ? a : b) - b) +define <4 x i32> @max_sub_uge_128(<4 x i32> %lhs, <4 x i32> %rhs) { +; CHECK-LABEL: @max_sub_uge_128( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[RHS]], <4 x i32> [[LHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[TMP1]], [[RHS]] +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; +entry: + %cmp = icmp uge <4 x i32> %lhs, %rhs + %sub = sub <4 x i32> %lhs, %rhs + %sel = select <4 x i1> %cmp, <4 x i32> %sub, <4 x i32> zeroinitializer + ret <4 x i32> %sel +} + +;(b < a) ? a - b : 0 -> ((a > b) ? a : b) - b) +define i64 @max_sub_ult(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @max_sub_ult( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[LHS]], i64 [[RHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[RHS]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ult i64 %rhs, %lhs + %sub = sub i64 %lhs, %rhs + %sel = select i1 %cmp, i64 %sub ,i64 0 + ret i64 %sel +} + +;(b > a) ? 0 : a - b -> ((a > b) ? a : b) - b) +define i64 @max_sub_ugt_sel_swapped(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @max_sub_ugt_sel_swapped( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[RHS]], i64 [[LHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[RHS]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ugt i64 %rhs, %lhs + %sub = sub i64 %lhs, %rhs + %sel = select i1 %cmp, i64 0 ,i64 %sub + ret i64 %sel +} + +;(a < b) ? 0 : a - b -> ((a > b) ? a : b) - b) +define i64 @max_sub_ult_sel_swapped(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @max_sub_ult_sel_swapped( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[RHS]], i64 [[LHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[RHS]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ult i64 %lhs, %rhs + %sub = sub i64 %lhs, %rhs + %sel = select i1 %cmp, i64 0 ,i64 %sub + ret i64 %sel +} + +;((a > b) ? b - a : 0) -> (b - ((a > b) ? a : b)) +define i64 @neg_max_sub_ugt(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @neg_max_sub_ugt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[LHS]], i64 [[RHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[RHS]], [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ugt i64 %lhs, %rhs + %sub = sub i64 %rhs, %lhs + %sel = select i1 %cmp, i64 %sub ,i64 0 + ret i64 %sel +} + +;((b < a) ? b - a : 0) -> - ((a > b) ? a : b) - b) +define i64 @neg_max_sub_ult(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @neg_max_sub_ult( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ugt i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[LHS]], i64 [[RHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[RHS]], [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ult i64 %rhs, %lhs + %sub = sub i64 %rhs, %lhs + %sel = select i1 %cmp, i64 %sub ,i64 0 + ret i64 %sel +} + +;((b > a) ? 0 : b - a) -> - ((a > b) ? a : b) - b) +define i64 @neg_max_sub_ugt_sel_swapped(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[RHS]], i64 [[LHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[RHS]], [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ugt i64 %rhs, %lhs + %sub = sub i64 %rhs, %lhs + %sel = select i1 %cmp, i64 0 ,i64 %sub + ret i64 %sel +} + +;((a < b) ? 0 : b - a) -> - ((a > b) ? a : b) - b) +define i64 @neg_max_sub_ult_sel_swapped(i64 %lhs, i64 %rhs) { +; CHECK-LABEL: @neg_max_sub_ult_sel_swapped( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i64 [[LHS:%.*]], [[RHS:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[RHS]], i64 [[LHS]] +; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[RHS]], [[TMP1]] +; CHECK-NEXT: ret i64 [[TMP2]] +; +entry: + %cmp = icmp ult i64 %lhs, %rhs + %sub = sub i64 %rhs, %lhs + %sel = select i1 %cmp, i64 0 ,i64 %sub + ret i64 %sel +}