Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5386,6 +5386,36 @@ return nullptr; } +// extract(uadd.with.overflow(A, B), 0) ult A +// -> extract(uadd.with.overflow(A, B), 1) +static Instruction *foldICmpOfUAddOv(ICmpInst &I) { + CmpInst::Predicate Pred = I.getPredicate(); + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + Value *UAddOv; + Value *A, *B; + auto UAddOvResultPat = m_ExtractValue<0>( + m_Intrinsic(m_Value(A), m_Value(B))); + if (match(Op0, UAddOvResultPat) && + ((Pred == ICmpInst::ICMP_ULT && (Op1 == A || Op1 == B)) || + (Pred == ICmpInst::ICMP_EQ && match(Op1, m_ZeroInt()) && + (match(A, m_One()) || match(B, m_One()))) || + (Pred == ICmpInst::ICMP_NE && match(Op1, m_AllOnes()) && + (match(A, m_AllOnes()) || match(B, m_AllOnes()))))) + // extract(uadd.with.overflow(A, B), 0) < A + // extract(uadd.with.overflow(A, 1), 0) == 0 + // extract(uadd.with.overflow(A, -1), 0) != -1 + UAddOv = cast(Op0)->getAggregateOperand(); + else if (match(Op1, UAddOvResultPat) && + Pred == ICmpInst::ICMP_UGT && (Op0 == A || Op0 == B)) + // A > extract(uadd.with.overflow(A, B), 0) + UAddOv = cast(Op1)->getAggregateOperand(); + else + return nullptr; + + return ExtractValueInst::Create(UAddOv, 1); +} + Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { bool Changed = false; const SimplifyQuery Q = SQ.getWithInstruction(&I); @@ -5574,6 +5604,9 @@ if (Instruction *Res = foldICmpEquality(I)) return Res; + if (Instruction *Res = foldICmpOfUAddOv(I)) + return Res; + // The 'cmpxchg' instruction returns an aggregate containing the old value and // an i1 which indicates whether or not we successfully did the swap. // Index: llvm/test/Transforms/InstCombine/with_overflow.ll =================================================================== --- llvm/test/Transforms/InstCombine/with_overflow.ll +++ llvm/test/Transforms/InstCombine/with_overflow.ll @@ -356,8 +356,7 @@ ; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) ; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0 -; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], [[X]] +; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: ret i1 [[D]] ; %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) @@ -373,8 +372,7 @@ ; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]]) ; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0 -; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], [[Y]] +; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: ret i1 [[D]] ; %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) @@ -391,8 +389,7 @@ ; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X]], i32 [[Y:%.*]]) ; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0 -; CHECK-NEXT: [[D:%.*]] = icmp ugt i32 [[X]], [[C]] +; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: ret i1 [[D]] ; %x = urem i32 42, %xx ; Thwart complexity-based canonicalization @@ -410,8 +407,7 @@ ; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y]]) ; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0 -; CHECK-NEXT: [[D:%.*]] = icmp ugt i32 [[Y]], [[C]] +; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: ret i1 [[D]] ; %y = urem i32 42, %yy ; Thwart complexity-based canonicalization @@ -428,8 +424,7 @@ ; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 42) ; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0 -; CHECK-NEXT: [[D:%.*]] = icmp ult i32 [[C]], 42 +; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: ret i1 [[D]] ; %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 42) @@ -445,8 +440,7 @@ ; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 1) ; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0 -; CHECK-NEXT: [[D:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: ret i1 [[D]] ; %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1) @@ -462,8 +456,7 @@ ; CHECK-NEXT: [[A:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 -1) ; CHECK-NEXT: [[B:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: store i1 [[B]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[C:%.*]] = extractvalue { i32, i1 } [[A]], 0 -; CHECK-NEXT: [[D:%.*]] = icmp ne i32 [[C]], -1 +; CHECK-NEXT: [[D:%.*]] = extractvalue { i32, i1 } [[A]], 1 ; CHECK-NEXT: ret i1 [[D]] ; %a = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 -1)