diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h --- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h +++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h @@ -236,7 +236,8 @@ /// uses of V and only keep uses of ~V. /// /// See also: canFreelyInvertAllUsersOf() - static bool isFreeToInvert(Value *V, bool WillInvertAllUses) { + static bool isFreeToInvert(Value *V, bool WillInvertAllUses, + unsigned Depth = 0) { // ~(~(X)) -> X. if (match(V, m_Not(PatternMatch::m_Value()))) return true; @@ -245,32 +246,38 @@ if (match(V, PatternMatch::m_AnyIntegralConstant())) return true; + if (Depth++ >= MaxAnalysisRecursionDepth) + return false; + + // The rest of the cases require that we invert all uses so don't bother + // doing the analysis if we know we can't use the result. + if (!WillInvertAllUses) + return false; + // Compares can be inverted if all of their uses are being modified to use // the ~V. if (isa(V)) - return WillInvertAllUses; - - // If `V` is of the form `A + Constant` then `-1 - V` can be folded into - // `(-1 - Constant) - A` if we are willing to invert all of the uses. - if (match(V, m_Add(PatternMatch::m_Value(), PatternMatch::m_ImmConstant()))) - return WillInvertAllUses; - - // If `V` is of the form `Constant - A` then `-1 - V` can be folded into - // `A + (-1 - Constant)` if we are willing to invert all of the uses. - if (match(V, m_Sub(PatternMatch::m_ImmConstant(), PatternMatch::m_Value()))) - return WillInvertAllUses; - - // Selects with invertible operands are freely invertible - if (match(V, - m_Select(PatternMatch::m_Value(), m_Not(PatternMatch::m_Value()), - m_Not(PatternMatch::m_Value())))) - return WillInvertAllUses; - - // Min/max may be in the form of intrinsics, so handle those identically - // to select patterns. - if (match(V, m_MaxOrMin(m_Not(PatternMatch::m_Value()), - m_Not(PatternMatch::m_Value())))) - return WillInvertAllUses; + return true; + + Value *A, *B; + // If `V` is of the form `A + B` then `-1 - V` can be folded into + // `~B - A` or `~A - B` if we are willing to invert all of the uses. + if (match(V, m_Add(PatternMatch::m_Value(A), PatternMatch::m_Value(B)))) + return isFreeToInvert(A, A->hasOneUse(), Depth) || + isFreeToInvert(B, B->hasOneUse(), Depth); + + // If `V` is of the form `A - B` then `-1 - V` can be folded into + // `~A + B` if we are willing to invert all of the uses. + if (match(V, m_Sub(PatternMatch::m_Value(A), PatternMatch::m_Value()))) + return isFreeToInvert(A, A->hasOneUse(), Depth); + + // Selects/min/max with invertible operands are freely invertible + if (match(V, m_Select(PatternMatch::m_Value(), PatternMatch::m_Value(A), + PatternMatch::m_Value(B))) || + match(V, + m_MaxOrMin(PatternMatch::m_Value(A), PatternMatch::m_Value(B)))) + return isFreeToInvert(A, A->hasOneUse(), Depth) && + isFreeToInvert(B, B->hasOneUse(), Depth); return false; } diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -1494,14 +1494,13 @@ define i8 @sub_not_min_max(i8 %r, i8 %g, i8 %b) { ; CHECK-LABEL: @sub_not_min_max( -; CHECK-NEXT: [[NOTR:%.*]] = xor i8 [[R:%.*]], -1 ; CHECK-NEXT: [[NOTG:%.*]] = xor i8 [[G:%.*]], -1 ; CHECK-NEXT: call void @use(i8 [[NOTG]]) ; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1 ; CHECK-NEXT: call void @use(i8 [[NOTB]]) -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]]) -; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]]) -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[B]]) +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] ; CHECK-NEXT: ret i8 [[CK]] ; %notr = xor i8 %r, -1 @@ -1523,9 +1522,9 @@ ; CHECK-NEXT: call void @use(i8 [[NOTG]]) ; CHECK-NEXT: [[NOTB:%.*]] = xor i8 [[B:%.*]], -1 ; CHECK-NEXT: call void @use(i8 [[NOTB]]) -; CHECK-NEXT: [[M:%.*]] = call i8 @llvm.smin.i8(i8 [[NOTR]], i8 [[NOTG]]) -; CHECK-NEXT: [[K:%.*]] = call i8 @llvm.smin.i8(i8 [[M]], i8 [[NOTB]]) -; CHECK-NEXT: [[CK:%.*]] = sub i8 [[NOTR]], [[K]] +; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[R]], i8 [[G]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 [[B]]) +; CHECK-NEXT: [[CK:%.*]] = sub i8 [[TMP2]], [[R]] ; CHECK-NEXT: ret i8 [[CK]] ; %notr = xor i8 %r, -1 diff --git a/llvm/test/Transforms/InstCombine/pr63791.ll b/llvm/test/Transforms/InstCombine/pr63791.ll --- a/llvm/test/Transforms/InstCombine/pr63791.ll +++ b/llvm/test/Transforms/InstCombine/pr63791.ll @@ -17,7 +17,7 @@ ; CHECK: for.cond5.preheader.i: ; CHECK-NEXT: br i1 false, label [[FOR_INC19_I:%.*]], label [[FOR_COND1_LOOPEXIT_I:%.*]] ; CHECK: for.inc19.i: -; CHECK-NEXT: br i1 false, label [[FOR_INC19_I]], label [[FOR_COND1_LOOPEXIT_I]] +; CHECK-NEXT: br i1 true, label [[FOR_COND1_LOOPEXIT_I]], label [[FOR_INC19_I]] ; entry: br label %for.cond.i diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -1294,7 +1294,7 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE6]] ] ; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[INDEX]], 3 @@ -1355,23 +1355,24 @@ ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.continue6: ; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP42]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP44:%.*]] = icmp ne <4 x i32> [[TMP43]], zeroinitializer -; CHECK-NEXT: [[TMP45:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP44]], <4 x i1> zeroinitializer -; CHECK-NEXT: [[TMP46:%.*]] = xor <4 x i1> [[TMP19]], -; CHECK-NEXT: [[TMP47:%.*]] = or <4 x i1> [[TMP45]], [[TMP46]] -; CHECK-NEXT: [[TMP48:%.*]] = bitcast <4 x i1> [[TMP47]] to i4 -; CHECK-NEXT: [[TMP49:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP48]]), !range [[RNG42:![0-9]+]] -; CHECK-NEXT: [[TMP50:%.*]] = zext i4 [[TMP49]] to i32 -; CHECK-NEXT: [[TMP51]] = add i32 [[VEC_PHI]], [[TMP50]] +; CHECK-NEXT: [[TMP44:%.*]] = icmp eq <4 x i32> [[TMP43]], zeroinitializer +; CHECK-NEXT: [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], +; CHECK-NEXT: [[TMP45:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> , <4 x i1> [[TMP44]] +; CHECK-NEXT: [[DOTNOT7:%.*]] = and <4 x i1> [[TMP45]], [[TMP19]] +; CHECK-NEXT: [[NOT__NOT7:%.*]] = xor <4 x i1> [[DOTNOT7]], +; CHECK-NEXT: [[TMP46:%.*]] = bitcast <4 x i1> [[NOT__NOT7]] to i4 +; CHECK-NEXT: [[TMP47:%.*]] = call i4 @llvm.ctpop.i4(i4 [[TMP46]]), !range [[RNG42:![0-9]+]] +; CHECK-NEXT: [[TMP48:%.*]] = zext i4 [[TMP47]] to i32 +; CHECK-NEXT: [[TMP49]] = add i32 [[VEC_PHI]], [[TMP48]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup: -; CHECK-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[A_1_LCSSA]] ; CHECK: for.body: ; CHECK-NEXT: br i1 poison, label [[LOR_LHS_FALSE:%.*]], label [[IF_THEN:%.*]]