Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1191,6 +1191,25 @@ } } + // A + ~Select(P, A, B) -> Select(P, -1, A+~B) If B is freely invertible + // A + ~Select(P, B, A) -> Select(P, A+~B, -1) + Value *Select, *Pred, *Not; + if (match(&I, m_c_BinOp(m_CombineAnd(m_Not(m_Value(Select)), m_Value(Not)), + m_Value(A))) && + (match(Select, m_Select(m_Value(Pred), m_Specific(A), m_Value(B))) || + match(Select, m_Select(m_Value(Pred), m_Value(B), m_Specific(A)))) && + IsFreeToInvert(B, B->hasOneUse()) && Select->hasOneUse() && + Not->hasOneUse()) { + Value *Not = Builder.CreateNot(B); + Value *AmC = Builder.CreateAdd(A, Not); + if (cast(Select)->getTrueValue() == A) + return SelectInst::Create( + Pred, ConstantInt::getAllOnesValue(A->getType()), AmC); + else + return SelectInst::Create(Pred, AmC, + ConstantInt::getAllOnesValue(A->getType())); + } + if (Instruction *Ext = narrowMathIfNoOverflow(I)) return Ext; Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -314,11 +314,23 @@ Known.One = std::move(IKnownOne); break; } - case Instruction::Select: - // If this is a select as part of a min/max pattern, don't simplify any - // further in case we break the structure. + case Instruction::Select: { Value *LHS, *RHS; - if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN) + SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; + if (SPF == SPF_UMAX) { + // if the demand bits are an upper bit mask, UMax(A, C) == A if C < + // -DemandMark + // i.e if we only care about the top bits, then if A < C we will get the + // same result anyway + const APInt *C; + if ((-DemandedMask).isPowerOf2() && match(RHS, m_APInt(C)) && + (C->ult(-DemandedMask))) + return LHS; + } + + // If this is a select as part of any other min/max pattern, don't simplify + // any further in case we break the structure. + if (SPF != SPF_UNKNOWN) return nullptr; if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || @@ -336,6 +348,7 @@ Known.One = RHSKnown.One & LHSKnown.One; Known.Zero = RHSKnown.Zero & LHSKnown.Zero; break; + } case Instruction::ZExt: case Instruction::Trunc: { unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); Index: test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll =================================================================== --- test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll +++ test/Transforms/IndVarSimplify/replace-loop-exit-folds.ll @@ -7,14 +7,8 @@ define i32 @remove_loop(i32 %size) { ; CHECK-LABEL: @remove_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[SIZE:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], -32 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], [[SIZE]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -32 -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[SIZE:%.*]], 31 +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: br label %while.cond @@ -33,22 +27,16 @@ define i32 @used_loop(i32 %size) minsize { ; CHECK-LABEL: @used_loop( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[SIZE:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], -32 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], [[SIZE]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -32 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] +; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_COND]] ] ; CHECK-NEXT: tail call void @call() ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31 ; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_COND]], label [[WHILE_END:%.*]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[SIZE]], 31 +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: br label %while.cond @@ -68,15 +56,9 @@ define i32 @used_loop2(i32 %size) { ; CHECK-LABEL: @used_loop2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[SIZE:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP0]], -32 -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -32 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[UMAX]], [[SIZE]] -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 32 -; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], -32 ; CHECK-NEXT: br label [[WHILE_COND:%.*]] ; CHECK: while.cond: -; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: [[SIZE_ADDR_0:%.*]] = phi i32 [ [[SIZE:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[WHILE_BODY:%.*]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[SIZE_ADDR_0]], 31 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_END:%.*]] ; CHECK: while.body: @@ -84,8 +66,8 @@ ; CHECK-NEXT: [[SUB]] = add i32 [[SIZE_ADDR_0]], -32 ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.end: -; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[SIZE]], [[TMP4]] -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[SIZE]], 31 +; CHECK-NEXT: ret i32 [[TMP0]] ; entry: br label %while.cond Index: test/Transforms/InstCombine/add-select.ll =================================================================== --- test/Transforms/InstCombine/add-select.ll +++ test/Transforms/InstCombine/add-select.ll @@ -34,9 +34,8 @@ define i32 @A_plus_not_smin_invertible(i32 %A) { ; CHECK-LABEL: @A_plus_not_smin_invertible( ; CHECK-NEXT: [[L0:%.*]] = icmp slt i32 [[A:%.*]], 31 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[A]], i32 31 -; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[L1]], -1 -; CHECK-NEXT: [[X:%.*]] = add i32 [[NOT]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A]], -32 +; CHECK-NEXT: [[X:%.*]] = select i1 [[L0]], i32 -1, i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[X]] ; %l0 = icmp slt i32 %A, 31 @@ -49,9 +48,8 @@ define i32 @A_plus_not_smax_invertible(i32 %A) { ; CHECK-LABEL: @A_plus_not_smax_invertible( ; CHECK-NEXT: [[L0:%.*]] = icmp sgt i32 [[A:%.*]], 31 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[A]], i32 31 -; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[L1]], -1 -; CHECK-NEXT: [[X:%.*]] = add i32 [[NOT]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[A]], -32 +; CHECK-NEXT: [[X:%.*]] = select i1 [[L0]], i32 -1, i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[X]] ; %l0 = icmp sgt i32 %A, 31 @@ -63,10 +61,8 @@ define i32 @usebase(i1 %p, i32 %A, i32 %Bi) { ; CHECK-LABEL: @usebase( -; CHECK-NEXT: [[B:%.*]] = xor i32 [[BI:%.*]], -1 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[P:%.*]], i32 [[A:%.*]], i32 [[B]] -; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[L1]], -1 -; CHECK-NEXT: [[X:%.*]] = add i32 [[NOT]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[BI:%.*]], [[A:%.*]] +; CHECK-NEXT: [[X:%.*]] = select i1 [[P:%.*]], i32 -1, i32 [[TMP1]] ; CHECK-NEXT: ret i32 [[X]] ; %B = xor i32 %Bi, -1 @@ -79,9 +75,8 @@ define i32 @useB(i1 %p, i32 %A, i32 %Bi) { ; CHECK-LABEL: @useB( ; CHECK-NEXT: [[B:%.*]] = xor i32 [[BI:%.*]], -1 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[P:%.*]], i32 [[A:%.*]], i32 [[B]] -; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[L1]], -1 -; CHECK-NEXT: [[X:%.*]] = add i32 [[NOT]], [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[BI]], [[A:%.*]] +; CHECK-NEXT: [[X:%.*]] = select i1 [[P:%.*]], i32 -1, i32 [[TMP1]] ; CHECK-NEXT: call void @use(i32 [[B]]) ; CHECK-NEXT: ret i32 [[X]] ; Index: test/Transforms/InstCombine/minmax-demandbits.ll =================================================================== --- test/Transforms/InstCombine/minmax-demandbits.ll +++ test/Transforms/InstCombine/minmax-demandbits.ll @@ -4,9 +4,7 @@ define i32 @and_umax_less(i32 %A) { ; CHECK-LABEL: @and_umax_less( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 31 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 31 -; CHECK-NEXT: [[X:%.*]] = and i32 [[L1]], -32 +; CHECK-NEXT: [[X:%.*]] = and i32 [[A:%.*]], -32 ; CHECK-NEXT: ret i32 [[X]] ; %l0 = icmp ugt i32 31, %A @@ -17,9 +15,7 @@ define i32 @and_umax_muchless(i32 %A) { ; CHECK-LABEL: @and_umax_muchless( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 12 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 12 -; CHECK-NEXT: [[X:%.*]] = and i32 [[L1]], -32 +; CHECK-NEXT: [[X:%.*]] = and i32 [[A:%.*]], -32 ; CHECK-NEXT: ret i32 [[X]] ; %l0 = icmp ugt i32 12, %A @@ -43,9 +39,7 @@ define i32 @shr_umax(i32 %A) { ; CHECK-LABEL: @shr_umax( -; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], 15 -; CHECK-NEXT: [[L1:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 15 -; CHECK-NEXT: [[X:%.*]] = lshr i32 [[L1]], 4 +; CHECK-NEXT: [[X:%.*]] = lshr i32 [[A:%.*]], 4 ; CHECK-NEXT: ret i32 [[X]] ; %l0 = icmp ugt i32 15, %A