diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2651,7 +2651,35 @@ return nullptr; } -Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) { +// Given i1 V, can every user of V can be freely adapted if V is changed to !V ? +static bool CanFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) { + // Look at every user of V. + for (User *U : V->users()) { + if (U == IgnoredUser) + continue; // Don't consider this user. + + auto *I = cast(U); + switch (I->getOpcode()) { + case Instruction::Select: + case Instruction::Br: + break; // Free to invert by swapping true/false values/destinations. + case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring it. + if (!match(I, m_Not(m_Value()))) + return false; // Not a 'not'. + break; + default: + return false; // Don't know, likely not freely invertible. + } + // So far all users were free to invert... + } + return true; // Can freely invert all users! +} + +Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, + BinaryOperator &I) { + assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS && + I.getOperand(1) == RHS && "Should be 'xor' with these operands"); + if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) { if (LHS->getOperand(0) == RHS->getOperand(1) && LHS->getOperand(1) == RHS->getOperand(0)) @@ -2706,14 +2734,35 @@ // TODO: If OrICmp is false, the whole thing is false (InstSimplify?). if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) { // TODO: Independently handle cases where the 'and' side is a constant. - if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) { - // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS - RHS->setPredicate(RHS->getInversePredicate()); - return Builder.CreateAnd(LHS, RHS); + ICmpInst *X = nullptr, *Y = nullptr; + if (OrICmp == LHS && AndICmp == RHS) { + // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y + X = LHS; + Y = RHS; } - if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) { - // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS - LHS->setPredicate(LHS->getInversePredicate()); + if (OrICmp == RHS && AndICmp == LHS) { + // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X + X = RHS; + Y = LHS; + } + if (X && Y && (Y->hasOneUse() || CanFreelyInvertAllUsersOf(Y, &I))) { + // Invert the predicate of 'Y', thus inverting it's output. + Y->setPredicate(Y->getInversePredicate()); + // So, are there other uses of Y? + if (!Y->hasOneUse()) { + // We need to adapt other uses of Y though. Get a value that matches + // the original value of Y before inversion. While this increases + // immediate instruction count, we have just ensured that all the + // users are freely-inversible, so that 'not' *will* get folded away. + BuilderTy::InsertPointGuard Guard(Builder); + // Set insertion point to right after the Y. + Builder.SetInsertPoint(Y->getParent(), ++(Y->getIterator())); + Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not"); + // Replace all uses of Y (excluding the one in NotY!) with NotY. + Y->replaceUsesWithIf(NotY, + [NotY](Use &U) { return U.getUser() != NotY; }); + } + // All done. return Builder.CreateAnd(LHS, RHS); } } @@ -3038,7 +3087,7 @@ if (auto *LHS = dyn_cast(I.getOperand(0))) if (auto *RHS = dyn_cast(I.getOperand(1))) - if (Value *V = foldXorOfICmps(LHS, RHS)) + if (Value *V = foldXorOfICmps(LHS, RHS, I)) return replaceInstUsesWith(I, V); if (Instruction *CastedXor = foldCastedBitwiseLogic(I)) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -540,7 +540,7 @@ Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI); - Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS); + Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &I); /// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp). /// NOTE: Unlike most of instcombine, this returns a Value which should diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-with-select-of-constant-threshold-pattern.ll @@ -77,11 +77,11 @@ define i32 @t4_select_cond_xor_v0(i32 %X) { ; CHECK-LABEL: @t4_select_cond_xor_v0( -; CHECK-NEXT: [[NEED_TO_CLAMP_POSITIVE:%.*]] = icmp sgt i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[DONT_NEED_TO_CLAMP_NEGATIVE:%.*]] = icmp sgt i32 [[X]], -32768 -; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_POSITIVE]], i32 32767, i32 -32768 -; CHECK-NEXT: [[DONT_NEED_TO_CLAMP:%.*]] = xor i1 [[NEED_TO_CLAMP_POSITIVE]], [[DONT_NEED_TO_CLAMP_NEGATIVE]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[DONT_NEED_TO_CLAMP]], i32 [[X]], i32 [[CLAMP_LIMIT]] +; CHECK-NEXT: [[NEED_TO_CLAMP_POSITIVE:%.*]] = icmp slt i32 [[X:%.*]], 32768 +; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_POSITIVE]], i32 -32768, i32 32767 +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[CLAMP_LIMIT]] ; CHECK-NEXT: ret i32 [[R]] ; %need_to_clamp_positive = icmp sgt i32 %X, 32767 @@ -110,11 +110,11 @@ define i32 @t5_select_cond_xor_v2(i32 %X) { ; CHECK-LABEL: @t5_select_cond_xor_v2( -; CHECK-NEXT: [[DONT_NEED_TO_CLAMP_POSITIVE:%.*]] = icmp slt i32 [[X:%.*]], 32768 -; CHECK-NEXT: [[NEED_TO_CLAMP_NEGATIVE:%.*]] = icmp slt i32 [[X]], -32767 -; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_NEGATIVE]], i32 -32768, i32 32767 -; CHECK-NEXT: [[DONT_NEED_TO_CLAMP:%.*]] = xor i1 [[DONT_NEED_TO_CLAMP_POSITIVE]], [[NEED_TO_CLAMP_NEGATIVE]] -; CHECK-NEXT: [[R:%.*]] = select i1 [[DONT_NEED_TO_CLAMP]], i32 [[X]], i32 [[CLAMP_LIMIT]] +; CHECK-NEXT: [[NEED_TO_CLAMP_NEGATIVE:%.*]] = icmp sgt i32 [[X:%.*]], -32768 +; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_NEGATIVE]], i32 32767, i32 -32768 +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[CLAMP_LIMIT]] ; CHECK-NEXT: ret i32 [[R]] ; %dont_need_to_clamp_positive = icmp sle i32 %X, 32767 diff --git a/llvm/test/Transforms/InstCombine/xor-of-icmps-with-extra-uses.ll b/llvm/test/Transforms/InstCombine/xor-of-icmps-with-extra-uses.ll --- a/llvm/test/Transforms/InstCombine/xor-of-icmps-with-extra-uses.ll +++ b/llvm/test/Transforms/InstCombine/xor-of-icmps-with-extra-uses.ll @@ -7,12 +7,12 @@ ; %cond0 is extra-used in select, which is freely invertible. define i1 @v0_select_of_consts(i32 %X, i32* %selected) { ; CHECK-LABEL: @v0_select_of_consts( -; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 32767, i32 -32768 +; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 -32768, i32 32767 ; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4 -; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]] -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: ret i1 [[TMP1]] ; %cond0 = icmp sgt i32 %X, 32767 %cond1 = icmp sgt i32 %X, -32768 @@ -23,12 +23,12 @@ } define i1 @v1_select_of_var_and_const(i32 %X, i32 %Y, i32* %selected) { ; CHECK-LABEL: @v1_select_of_var_and_const( -; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 [[Y:%.*]], i32 -32768 +; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 -32768, i32 [[Y:%.*]] ; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4 -; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]] -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: ret i1 [[TMP1]] ; %cond0 = icmp sgt i32 %X, 32767 %cond1 = icmp sgt i32 %X, -32768 @@ -39,12 +39,12 @@ } define i1 @v2_select_of_const_and_var(i32 %X, i32 %Y, i32* %selected) { ; CHECK-LABEL: @v2_select_of_const_and_var( -; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 32767, i32 [[Y:%.*]] +; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 [[Y:%.*]], i32 32767 ; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4 -; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]] -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: ret i1 [[TMP1]] ; %cond0 = icmp sgt i32 %X, 32767 %cond1 = icmp sgt i32 %X, -32768 @@ -58,9 +58,8 @@ define i1 @v3_branch(i32 %X, i32* %dst0, i32* %dst1) { ; CHECK-LABEL: @v3_branch( ; CHECK-NEXT: begin: -; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768 -; CHECK-NEXT: br i1 [[COND0]], label [[BB0:%.*]], label [[BB1:%.*]] +; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768 +; CHECK-NEXT: br i1 [[COND0]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: ; CHECK-NEXT: store i32 0, i32* [[DST0:%.*]], align 4 ; CHECK-NEXT: br label [[END:%.*]] @@ -68,8 +67,9 @@ ; CHECK-NEXT: store i32 0, i32* [[DST1:%.*]], align 4 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]] -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: ret i1 [[TMP0]] ; begin: %cond0 = icmp sgt i32 %X, 32767 @@ -89,12 +89,11 @@ ; Can invert 'not'. define i1 @v4_not_store(i32 %X, i1* %not_cond) { ; CHECK-LABEL: @v4_not_store( -; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[NOT_COND0:%.*]] = xor i1 [[COND0]], true -; CHECK-NEXT: store i1 [[NOT_COND0]], i1* [[NOT_COND:%.*]], align 1 -; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768 -; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]] -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768 +; CHECK-NEXT: store i1 [[COND0]], i1* [[NOT_COND:%.*]], align 1 +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: ret i1 [[TMP1]] ; %cond0 = icmp sgt i32 %X, 32767 %not_cond0 = xor i1 %cond0, -1 @@ -108,14 +107,13 @@ ; All extra uses are invertible. define i1 @v5_select_and_not(i32 %X, i32 %Y, i32* %selected, i1* %not_cond) { ; CHECK-LABEL: @v5_select_and_not( -; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767 -; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768 -; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 32767, i32 [[Y:%.*]] -; CHECK-NEXT: [[NOT_COND0:%.*]] = xor i1 [[COND0]], true -; CHECK-NEXT: store i1 [[NOT_COND0]], i1* [[NOT_COND:%.*]], align 1 +; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768 +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 [[Y:%.*]], i32 32767 +; CHECK-NEXT: store i1 [[COND0]], i1* [[NOT_COND:%.*]], align 1 ; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4 -; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]] -; CHECK-NEXT: ret i1 [[RES]] +; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535 +; CHECK-NEXT: ret i1 [[TMP1]] ; %cond0 = icmp sgt i32 %X, 32767 %cond1 = icmp sgt i32 %X, -32768