diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1674,7 +1674,8 @@ } //===----------------------------------------------------------------------===// -// Matchers for overflow check patterns: e.g. (a + b) u< a +// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) @@ -1705,6 +1706,19 @@ if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); + Value *Op1; + auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes())); + // (a ^ -1) u (a ^ -1) + if (Pred == ICmpInst::ICMP_UGT) { + if (XorExpr.match(ICmpRHS)) + return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS); + } + // Match special-case for increment-by-1. if (Pred == ICmpInst::ICMP_EQ) { // (a + 1) == 0 diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -399,7 +399,8 @@ bool simplifyOffsetableRelocate(Instruction &I); bool tryToSinkFreeOperands(Instruction *I); - bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, + bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, + Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID); bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); @@ -1185,6 +1186,7 @@ } bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, + Value *Arg0, Value *Arg1, CmpInst *Cmp, Intrinsic::ID IID) { if (BO->getParent() != Cmp->getParent()) { @@ -1202,8 +1204,6 @@ } // We allow matching the canonical IR (add X, C) back to (usubo X, -C). - Value *Arg0 = BO->getOperand(0); - Value *Arg1 = BO->getOperand(1); if (BO->getOpcode() == Instruction::Add && IID == Intrinsic::usub_with_overflow) { assert(isa(Arg1) && "Unexpected input for usubo"); @@ -1222,12 +1222,16 @@ IRBuilder<> Builder(InsertPt); Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); - Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); + if (BO->getOpcode() != Instruction::Xor) { + Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); + BO->replaceAllUsesWith(Math); + } else + assert(BO->hasOneUse() && + "Patterns with XOr should use the BO only in the compare"); Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); - BO->replaceAllUsesWith(Math); Cmp->replaceAllUsesWith(OV); - BO->eraseFromParent(); Cmp->eraseFromParent(); + BO->eraseFromParent(); return true; } @@ -1267,9 +1271,13 @@ bool &ModifiedDT) { Value *A, *B; BinaryOperator *Add; - if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) + if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) { if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) return false; + // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases. + A = Add->getOperand(0); + B = Add->getOperand(1); + } if (!TLI->shouldFormOverflowOp(ISD::UADDO, TLI->getValueType(*DL, Add->getType()), @@ -1282,7 +1290,8 @@ if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) return false; - if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp, + Intrinsic::uadd_with_overflow)) return false; // Reset callers - do not crash by iterating over a dead instruction. @@ -1344,7 +1353,8 @@ Sub->hasNUsesOrMore(2))) return false; - if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow)) + if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1), + Cmp, Intrinsic::usub_with_overflow)) return false; // Reset callers - do not crash by iterating over a dead instruction. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5568,8 +5568,11 @@ isa(A->getType())) { Value *Result; Constant *Overflow; - if (OptimizeOverflowCheck(Instruction::Add, /*Signed*/false, A, B, - *AddI, Result, Overflow)) { + // m_UAddWithOverflow can match patterns that do not include an explicit + // "add" instruction, so check the opcode of the matched op. + if (AddI->getOpcode() == Instruction::Add && + OptimizeOverflowCheck(Instruction::Add, /*Signed*/ false, A, B, *AddI, + Result, Overflow)) { replaceInstUsesWith(*AddI, Result); return replaceInstUsesWith(I, Overflow); } diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll @@ -102,9 +102,9 @@ ; pattern as well. define i64 @uaddo6_xor(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor( -; CHECK-NEXT: [[X:%.*]] = xor i64 [[A:%.*]], -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; %x = xor i64 %a, -1 @@ -115,13 +115,13 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor_commuted( -; CHECK-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; - %x = xor i64 -1, %a - %cmp = icmp ult i64 %x, %b + %x = xor i64 %a, -1 + %cmp = icmp ugt i64 %b, %x %Q = select i1 %cmp, i64 %b, i64 42 ret i64 %Q } diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll @@ -153,9 +153,9 @@ ; pattern as well. define i64 @uaddo6_xor(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor( -; CHECK-NEXT: [[X:%.*]] = xor i64 [[A:%.*]], -1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; %x = xor i64 %a, -1 @@ -166,12 +166,12 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { ; CHECK-LABEL: @uaddo6_xor_commuted( -; CHECK-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]] -; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42 +; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]) +; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1 +; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42 ; CHECK-NEXT: ret i64 [[Q]] ; - %x = xor i64 -1, %a + %x = xor i64 %a, -1 %cmp = icmp ult i64 %x, %b %Q = select i1 %cmp, i64 %b, i64 42 ret i64 %Q