Index: llvm/trunk/include/llvm/IR/PatternMatch.h =================================================================== --- llvm/trunk/include/llvm/IR/PatternMatch.h +++ llvm/trunk/include/llvm/IR/PatternMatch.h @@ -295,6 +295,9 @@ /// \brief Match a value, capturing it if we match. inline bind_ty m_Value(Value *&V) { return V; } +/// \brief Match an instruction, capturing it if we match. +inline bind_ty m_Instruction(Instruction *&I) { return I; } + /// \brief Match a binary operator, capturing it if we match. inline bind_ty m_BinOp(BinaryOperator *&I) { return I; } @@ -1103,6 +1106,52 @@ return MaxMin_match(L, R); } +//===----------------------------------------------------------------------===// +// Matchers for overflow check patterns: e.g. (a + b) u< a +// + +template +struct UAddWithOverflow_match { + LHS_t L; + RHS_t R; + Sum_t S; + + UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S) + : L(L), R(R), S(S) {} + + template bool match(OpTy *V) { + Value *ICmpLHS, *ICmpRHS; + ICmpInst::Predicate Pred; + if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V)) + return false; + + Value *AddLHS, *AddRHS; + auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS)); + + // (a + b) u< a, (a + b) u< b + if (Pred == ICmpInst::ICMP_ULT) + if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS)) + return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS); + + // a >u (a + b), b >u (a + b) + if (Pred == ICmpInst::ICMP_UGT) + if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS)) + return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS); + + return false; + } +}; + +/// \brief Match an icmp instruction checking for unsigned overflow on addition. +/// +/// S is matched to the addition whose result is being checked for overflow, and +/// L and R are matched to the LHS and RHS of S. +template +UAddWithOverflow_match +m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) { + return UAddWithOverflow_match(L, R, S); +} + /// \brief Match an 'unordered' floating point minimum function. /// Floating point has one special value 'NaN'. Therefore, there is no total /// order. However, if we can ignore the 'NaN' value (for example, because of a Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -747,13 +747,60 @@ return SinkCast(CI); } -/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce +/// CombineUAddWithOverflow - try to combine CI into a call to the +/// llvm.uadd.with.overflow intrinsic if possible. +/// +/// Return true if any changes were made. +static bool CombineUAddWithOverflow(CmpInst *CI) { + Value *A, *B; + Instruction *AddI; + if (!match(CI, + m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) + return false; + + Type *Ty = AddI->getType(); + if (!isa(Ty)) + return false; + + // We don't want to move around uses of condition values this late, so we we + // check if it is legal to create the call to the intrinsic in the basic + // block containing the icmp: + + if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) + return false; + +#ifndef NDEBUG + // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption + // for now: + if (AddI->hasOneUse()) + assert(*AddI->user_begin() == CI && "expected!"); +#endif + + Module *M = CI->getParent()->getParent()->getParent(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + + auto *InsertPt = AddI->hasOneUse() ? CI : AddI; + + auto *UAddWithOverflow = + CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); + auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); + auto *Overflow = + ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); + + CI->replaceAllUsesWith(Overflow); + AddI->replaceAllUsesWith(UAdd); + CI->eraseFromParent(); + AddI->eraseFromParent(); + return true; +} + +/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce /// the number of virtual registers that must be created and coalesced. This is /// a clear win except on targets with multiple condition code registers /// (PowerPC), where it might lose; some adjustment may be wanted there. /// /// Return true if any changes are made. -static bool OptimizeCmpExpression(CmpInst *CI) { +static bool SinkCmpExpression(CmpInst *CI) { BasicBlock *DefBB = CI->getParent(); /// InsertedCmp - Only insert a cmp in each block once. @@ -802,6 +849,16 @@ return MadeChange; } +static bool OptimizeCmpExpression(CmpInst *CI) { + if (SinkCmpExpression(CI)) + return true; + + if (CombineUAddWithOverflow(CI)) + return true; + + return false; +} + /// isExtractBitsCandidateUse - Check if the candidates could /// be combined with shift instruction, which includes: /// 1. Truncate instruction Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2109,35 +2109,6 @@ return ExtractValueInst::Create(Call, 1, "sadd.overflow"); } -static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, - InstCombiner &IC) { - // Don't bother doing this transformation for pointers, don't do it for - // vectors. - if (!isa(OrigAddV->getType())) return nullptr; - - // If the add is a constant expr, then we don't bother transforming it. - Instruction *OrigAdd = dyn_cast(OrigAddV); - if (!OrigAdd) return nullptr; - - Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); - - // Put the new code above the original add, in case there are any uses of the - // add between the add and the compare. - InstCombiner::BuilderTy *Builder = IC.Builder; - Builder->SetInsertPoint(OrigAdd); - - Module *M = I.getParent()->getParent()->getParent(); - Type *Ty = LHS->getType(); - Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); - CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd"); - Value *Add = Builder->CreateExtractValue(Call, 0); - - IC.ReplaceInstUsesWith(*OrigAdd, Add); - - // The original icmp gets replaced with the overflow value. - return ExtractValueInst::Create(Call, 1, "uadd.overflow"); -} - bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS, Value *RHS, Instruction &OrigI, Value *&Result, Constant *&Overflow) { @@ -3539,21 +3510,18 @@ return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A); } - // (a+b) llvm.uadd.with.overflow. - // (a+b) llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_ULT && - match(Op0, m_Add(m_Value(A), m_Value(B))) && - (Op1 == A || Op1 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op0, *this)) - return R; - - // a >u (a+b) --> llvm.uadd.with.overflow. - // b >u (a+b) --> llvm.uadd.with.overflow. - if (I.getPredicate() == ICmpInst::ICMP_UGT && - match(Op1, m_Add(m_Value(A), m_Value(B))) && - (Op0 == A || Op0 == B)) - if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) - return R; + Instruction *AddI = nullptr; + if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B), + m_Instruction(AddI))) && + isa(A->getType())) { + Value *Result; + Constant *Overflow; + if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result, + Overflow)) { + ReplaceInstUsesWith(*AddI, Result); + return ReplaceInstUsesWith(I, Overflow); + } + } // (zext a) * (zext b) --> llvm.umul.with.overflow. if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { Index: llvm/trunk/test/CodeGen/X86/add-of-carry.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/add-of-carry.ll +++ llvm/trunk/test/CodeGen/X86/add-of-carry.ll @@ -4,43 +4,26 @@ define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp { entry: ; CHECK-LABEL: test1: -; CHECK: cmpl %ecx, %eax -; CHECK-NOT: addl -; CHECK: adcl $0, %eax - %add4 = add i32 %x, %sum - %cmp = icmp ult i32 %add4, %x - %inc = zext i1 %cmp to i32 - %z.0 = add i32 %add4, %inc - ret i32 %z.0 -} - -; Instcombine transforms test1 into test2: -; CHECK-LABEL: test2: ; CHECK: movl ; CHECK-NEXT: addl ; CHECK-NEXT: adcl $0 ; CHECK-NEXT: ret -define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp { -entry: - %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum) - %0 = extractvalue { i32, i1 } %uadd, 0 - %cmp = extractvalue { i32, i1 } %uadd, 1 + %add4 = add i32 %x, %sum + %cmp = icmp ult i32 %add4, %x %inc = zext i1 %cmp to i32 - %z.0 = add i32 %0, %inc + %z.0 = add i32 %add4, %inc ret i32 %z.0 } ; -define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp { +define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp { entry: %cmp = icmp ugt i32 %x, %y %dec = sext i1 %cmp to i32 %dec.res = add nsw i32 %dec, %res ret i32 %dec.res -; CHECK-LABEL: test3: +; CHECK-LABEL: test2: ; CHECK: cmpl ; CHECK: sbbl ; CHECK: ret } - -declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone Index: llvm/trunk/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll +++ llvm/trunk/test/Transforms/CodeGenPrepare/overflow-intrinsics.ll @@ -0,0 +1,74 @@ +; RUN: opt -codegenprepare -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +; CHECK-LABEL: @test1( +; CHECK: llvm.uadd.with.overflow +; CHECK: ret i64 +define i64 @test1(i64 %a, i64 %b) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ult i64 %add, %a + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +; CHECK-LABEL: @test2( +; CHECK: llvm.uadd.with.overflow +; CHECK: ret i64 +define i64 @test2(i64 %a, i64 %b) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ult i64 %add, %b + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +; CHECK-LABEL: @test3( +; CHECK: llvm.uadd.with.overflow +; CHECK: ret i64 +define i64 @test3(i64 %a, i64 %b) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ugt i64 %b, %add + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q +} + +; CHECK-LABEL: @test4( +; CHECK: llvm.uadd.with.overflow +; CHECK: extractvalue +; CHECK: extractvalue +; CHECK: select +define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp { +entry: + %add = add i64 %b, %a + %cmp = icmp ugt i64 %b, %add + br i1 %c, label %next, label %exit + + next: + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q + + exit: + ret i64 0 +} + +; CHECK-LABEL: @test5( +; CHECK-NOT: llvm.uadd.with.overflow +; CHECK: next +define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp { +entry: + %add = add i64 %b, %a + store i64 %add, i64* %ptr + %cmp = icmp ugt i64 %b, %add + br i1 %c, label %next, label %exit + + next: + %Q = select i1 %cmp, i64 %b, i64 42 + ret i64 %Q + + exit: + ret i64 0 +} Index: llvm/trunk/test/Transforms/InstCombine/overflow.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/overflow.ll +++ llvm/trunk/test/Transforms/InstCombine/overflow.ll @@ -97,39 +97,6 @@ ; CHECK: ret i8 } -; CHECK-LABEL: @test5( -; CHECK: llvm.uadd.with.overflow -; CHECK: ret i64 -define i64 @test5(i64 %a, i64 %b) nounwind ssp { -entry: - %add = add i64 %b, %a - %cmp = icmp ult i64 %add, %a - %Q = select i1 %cmp, i64 %b, i64 42 - ret i64 %Q -} - -; CHECK-LABEL: @test6( -; CHECK: llvm.uadd.with.overflow -; CHECK: ret i64 -define i64 @test6(i64 %a, i64 %b) nounwind ssp { -entry: - %add = add i64 %b, %a - %cmp = icmp ult i64 %add, %b - %Q = select i1 %cmp, i64 %b, i64 42 - ret i64 %Q -} - -; CHECK-LABEL: @test7( -; CHECK: llvm.uadd.with.overflow -; CHECK: ret i64 -define i64 @test7(i64 %a, i64 %b) nounwind ssp { -entry: - %add = add i64 %b, %a - %cmp = icmp ugt i64 %b, %add - %Q = select i1 %cmp, i64 %b, i64 42 - ret i64 %Q -} - ; CHECK-LABEL: @test8( ; PR11438 ; This is @test1, but the operands are not sign-extended. Make sure