Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1662,6 +1662,39 @@ } } + { + // ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A + // ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A + // Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O) + // Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O) + // So long as O here is freely invertible, this will be neutral or a win. + Value *LHS, *RHS, *A; + Value *NotA = Op0, *MinMax = Op1; + SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor; + if (!SelectPatternResult::isMinOrMax(SPF)) { + NotA = Op1; + MinMax = Op0; + SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor; + } + if (SelectPatternResult::isMinOrMax(SPF) && + match(NotA, m_Not(m_Value(A))) && (NotA == LHS || NotA == RHS)) { + if (NotA == LHS) + std::swap(LHS, RHS); + // LHS is now O above and expected to have at least 2 uses (the min/max) + // NotA is epected to have 2 uses from the min/max and 1 from the sub. + if (IsFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) && + !NotA->hasNUsesOrMore(4)) { + // Note: We don't generate the inverse max/min, just create the not of + // it and let other folds do the rest. + Value *Not = Builder.CreateNot(MinMax); + if (NotA == Op0) + return BinaryOperator::CreateSub(Not, A); + else + return BinaryOperator::CreateSub(A, Not); + } + } + } + // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". Value *LHSOp, *RHSOp; Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -20,7 +20,6 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetFolder.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" @@ -33,6 +32,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" @@ -41,11 +41,14 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" +#include "llvm/Transforms/Utils/Local.h" #include #include #define DEBUG_TYPE "instcombine" +using namespace llvm::PatternMatch; + namespace llvm { class APInt; @@ -175,6 +178,10 @@ if (isa(BO->getOperand(0)) || isa(BO->getOperand(1))) return WillInvertAllUses; + // Selects with invertible operands are freely invertible + if (match(V, m_Select(m_Value(), m_Not(m_Value()), m_Not(m_Value())))) + return WillInvertAllUses; + return false; } Index: test/Transforms/InstCombine/sub-minmax.ll =================================================================== --- test/Transforms/InstCombine/sub-minmax.ll +++ test/Transforms/InstCombine/sub-minmax.ll @@ -125,10 +125,10 @@ define i32 @max_na_bi_minux_na_use(i32 %A, i32 %Bi) { ; CHECK-LABEL: @max_na_bi_minux_na_use( -; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[L0:%.*]] = icmp ult i32 [[NOT]], 31 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[L1]], [[NOT]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32 +; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP2]] ; CHECK-NEXT: call void @use32(i32 [[L1]]) ; CHECK-NEXT: ret i32 [[X]] ; @@ -142,10 +142,10 @@ define i32 @na_minus_max_na_bi_use(i32 %A, i32 %Bi) { ; CHECK-LABEL: @na_minus_max_na_bi_use( -; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1 -; CHECK-NEXT: [[L0:%.*]] = icmp ult i32 [[NOT]], 31 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31 -; CHECK-NEXT: [[X:%.*]] = sub i32 [[NOT]], [[L1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32 +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32 +; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP2]], [[A]] ; CHECK-NEXT: call void @use32(i32 [[L1]]) ; CHECK-NEXT: ret i32 [[X]] ; @@ -276,12 +276,11 @@ define i8 @umin_not_sub(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_not_sub( -; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1 -; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1 -; CHECK-NEXT: [[CMPXY:%.*]] = icmp ult i8 [[NX]], [[NY]] -; CHECK-NEXT: [[MINXY:%.*]] = select i1 [[CMPXY]], i8 [[NX]], i8 [[NY]] -; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[NX]], [[MINXY]] -; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[NY]], [[MINXY]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]] +; CHECK-NEXT: [[MINXY:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[TMP2]], [[X]] +; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[TMP2]], [[Y]] ; CHECK-NEXT: call void @use8(i8 [[SUBX]]) ; CHECK-NEXT: call void @use8(i8 [[SUBY]]) ; CHECK-NEXT: ret i8 [[MINXY]] @@ -299,12 +298,11 @@ define i8 @umin_not_sub_rev(i8 %x, i8 %y) { ; CHECK-LABEL: @umin_not_sub_rev( -; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1 -; CHECK-NEXT: [[NY:%.*]] = xor i8 [[Y:%.*]], -1 -; CHECK-NEXT: [[CMPXY:%.*]] = icmp ult i8 [[NX]], [[NY]] -; CHECK-NEXT: [[MINXY:%.*]] = select i1 [[CMPXY]], i8 [[NX]], i8 [[NY]] -; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[MINXY]], [[NX]] -; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[MINXY]], [[NY]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]] +; CHECK-NEXT: [[MINXY:%.*]] = xor i8 [[TMP2]], -1 +; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[X]], [[TMP2]] +; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[Y]], [[TMP2]] ; CHECK-NEXT: call void @use8(i8 [[SUBX]]) ; CHECK-NEXT: call void @use8(i8 [[SUBY]]) ; CHECK-NEXT: ret i8 [[MINXY]] @@ -322,17 +320,15 @@ define void @umin3_not_all_ops_extra_uses_invert_subs(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @umin3_not_all_ops_extra_uses_invert_subs( -; CHECK-NEXT: [[XN:%.*]] = xor i8 [[X:%.*]], -1 -; CHECK-NEXT: [[YN:%.*]] = xor i8 [[Y:%.*]], -1 -; CHECK-NEXT: [[ZN:%.*]] = xor i8 [[Z:%.*]], -1 -; CHECK-NEXT: [[CMPXZ:%.*]] = icmp ult i8 [[XN]], [[ZN]] -; CHECK-NEXT: [[MINXZ:%.*]] = select i1 [[CMPXZ]], i8 [[XN]], i8 [[ZN]] -; CHECK-NEXT: [[CMPXYZ:%.*]] = icmp ult i8 [[MINXZ]], [[YN]] -; CHECK-NEXT: [[MINXYZ:%.*]] = select i1 [[CMPXYZ]], i8 [[MINXZ]], i8 [[YN]] -; CHECK-NEXT: [[XMIN:%.*]] = sub i8 [[XN]], [[MINXYZ]] -; CHECK-NEXT: [[YMIN:%.*]] = sub i8 [[YN]], [[MINXYZ]] -; CHECK-NEXT: [[ZMIN:%.*]] = sub i8 [[ZN]], [[MINXYZ]] -; CHECK-NEXT: call void @use8(i8 [[MINXYZ]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Z]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]] +; CHECK-NEXT: [[TMP5:%.*]] = xor i8 [[TMP4]], -1 +; CHECK-NEXT: [[XMIN:%.*]] = sub i8 [[TMP4]], [[X]] +; CHECK-NEXT: [[YMIN:%.*]] = sub i8 [[TMP4]], [[Y]] +; CHECK-NEXT: [[ZMIN:%.*]] = sub i8 [[TMP4]], [[Z]] +; CHECK-NEXT: call void @use8(i8 [[TMP5]]) ; CHECK-NEXT: call void @use8(i8 [[XMIN]]) ; CHECK-NEXT: call void @use8(i8 [[YMIN]]) ; CHECK-NEXT: call void @use8(i8 [[ZMIN]])