Index: lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -31,6 +31,8 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" @@ -323,11 +325,119 @@ return Changed; } +// See if we can prove that the given overflow intrinsic will not overflow. +static bool canRemoveOverflowCheck(IntrinsicInst *II, LazyValueInfo *LVI) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::uadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: + break; + } + auto Holds = [&] (unsigned Pred, Value *V, Constant *C) { + return LVI->getPredicateAt(Pred, V, C, II) == LazyValueInfo::True; + }; + Value *LHS = II->getOperand(0); + Value *RHS = II->getOperand(1); + IntegerType *RHSType = cast(RHS->getType()); + APInt UMax = APInt::getMaxValue(RHSType->getBitWidth()); + APInt SMax = APInt::getSignedMaxValue(RHSType->getBitWidth()); + APInt SMin = APInt::getSignedMinValue(RHSType->getBitWidth()); + if (ConstantInt *RHSConstant = dyn_cast(RHS)) + if (RHSConstant->isZero()) + return true; + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::uadd_with_overflow: + if (ConstantInt *LHSConstant = dyn_cast(LHS)) + if (LHSConstant->isZero()) + return true; + if (ConstantInt *RHSConstant = dyn_cast(RHS)) + if (Holds(ICmpInst::ICMP_ULE, LHS, + ConstantInt::get(RHSType, UMax - RHSConstant->getValue()))) + return true; + break; + case Intrinsic::usub_with_overflow: + if (ConstantInt *LHSConstant = dyn_cast(LHS)) + if (LHSConstant->isMaxValue(false)) + return true; + if (ConstantInt *RHSConstant = dyn_cast(RHS)) + if (Holds(ICmpInst::ICMP_UGE, LHS, RHSConstant)) + return true; + break; + case Intrinsic::sadd_with_overflow: + if (ConstantInt *LHSConstant = dyn_cast(LHS)) + if (LHSConstant->isZero() || + (LHSConstant->isMinValue(true) && + Holds(ICmpInst::ICMP_SGE, RHS, ConstantInt::get(RHSType, 0))) || + (LHSConstant->isMaxValue(true) && + Holds(ICmpInst::ICMP_SLE, RHS, ConstantInt::get(RHSType, 0)))) + return true; + if (ConstantInt *RHSConstant = dyn_cast(RHS)) + if ((RHSConstant->getValue().isStrictlyPositive() && + Holds(ICmpInst::ICMP_SLE, LHS, + ConstantInt::get(RHSType, SMax - RHSConstant->getValue()))) || + (RHSConstant->isNegative() && + Holds(ICmpInst::ICMP_SGE, LHS, + ConstantInt::get(RHSType, SMin - RHSConstant->getValue())))) + return true; + break; + case Intrinsic::ssub_with_overflow: + if (ConstantInt *LHSConstant = dyn_cast(LHS)) + if ((LHSConstant->isMinValue(true) && + Holds(ICmpInst::ICMP_SLE, RHS, ConstantInt::get(RHSType, 0))) || + (LHSConstant->isMaxValue(true) && + Holds(ICmpInst::ICMP_SGE, RHS, ConstantInt::get(RHSType, 0)))) + return true; + if (ConstantInt *RHSConstant = dyn_cast(RHS)) + if ((RHSConstant->getValue().isStrictlyPositive() && + Holds(ICmpInst::ICMP_SGE, LHS, + ConstantInt::get(RHSType, SMin + RHSConstant->getValue()))) || + (RHSConstant->isNegative() && + Holds(ICmpInst::ICMP_SLE, LHS, + ConstantInt::get(RHSType, SMax + RHSConstant->getValue())))) + return true; + break; + } + return false; +} + +static void processOverflowIntrinsic(IntrinsicInst *II) { + Value *NewOp = nullptr; + switch (II->getIntrinsicID()) { + default: + llvm_unreachable("Illegal instruction."); + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + NewOp = BinaryOperator::CreateAdd(II->getOperand(0), II->getOperand(1), + II->getName(), II); + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + NewOp = BinaryOperator::CreateSub(II->getOperand(0), II->getOperand(1), + II->getName(), II); + } + IRBuilder<> B(II); + Value *NewI = B.CreateInsertValue(UndefValue::get(II->getType()), NewOp, 0); + NewI = B.CreateInsertValue(NewI, ConstantInt::getFalse(II->getContext()), 1); + II->replaceAllUsesWith(NewI); + II->eraseFromParent(); +} + /// Infer nonnull attributes for the arguments at the specified callsite. static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { SmallVector ArgNos; unsigned ArgNo = 0; + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + if (canRemoveOverflowCheck(II, LVI)) { + processOverflowIntrinsic(II); + return true; + } + } + for (Value *V : CS.args()) { PointerType *Type = dyn_cast(V->getType()); // Try to mark pointer typed parameters as non-null. We skip the Index: test/Transforms/CorrelatedValuePropagation/overflows.ll =================================================================== --- /dev/null +++ test/Transforms/CorrelatedValuePropagation/overflows.ll @@ -0,0 +1,368 @@ +; RUN: opt -S -correlated-propagation < %s | FileCheck %s + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) + +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) + +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) + +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) + +declare void @llvm.trap() + + +define i32 @signed_add(i32 %x, i32 %y) { +; CHECK: define i32 @signed_add +; CHECK-NOT: @llvm.ssub.with.overflow.i32 +; CHECK: @llvm.sadd.with.overflow.i32 +entry: + %cmp = icmp sgt i32 %y, 0 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 2147483647, i32 %y) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: ; preds = %land.lhs.true, %land.lhs.true3, %cond.false + tail call void @llvm.trap() + unreachable + +cont: ; preds = %land.lhs.true + %2 = extractvalue { i32, i1 } %0, 0 + %cmp1 = icmp slt i32 %2, %x + br i1 %cmp1, label %cond.end, label %cond.false + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp slt i32 %y, 0 + br i1 %cmp2, label %land.lhs.true3, label %cond.false + +land.lhs.true3: ; preds = %lor.lhs.false + %3 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 -2147483648, i32 %y) + %4 = extractvalue { i32, i1 } %3, 1 + br i1 %4, label %trap, label %cont4 + +cont4: ; preds = %land.lhs.true3 + %5 = extractvalue { i32, i1 } %3, 0 + %cmp5 = icmp sgt i32 %5, %x + br i1 %cmp5, label %cond.end, label %cond.false + +cond.false: ; preds = %cont, %cont4, %lor.lhs.false + %6 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y) + %7 = extractvalue { i32, i1 } %6, 0 + %8 = extractvalue { i32, i1 } %6, 1 + br i1 %8, label %trap, label %cond.end + +cond.end: ; preds = %cond.false, %cont, %cont4 + %cond = phi i32 [ 0, %cont4 ], [ 0, %cont ], [ %7, %cond.false ] + ret i32 %cond +} + +define i32 @unsigned_add(i32 %x, i32 %y) { +; CHECK: define i32 @unsigned_add +; CHECK-NOT: @llvm.usub.with.overflow.i32 +; CHECK: @llvm.uadd.with.overflow.i32 +entry: + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 -1, i32 %y) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: ; preds = %cond.false, %entry + tail call void @llvm.trap() + unreachable + +cont: ; preds = %entry + %2 = extractvalue { i32, i1 } %0, 0 + %cmp1 = icmp ult i32 %2, %x + br i1 %cmp1, label %cond.end, label %cond.false + +cond.false: ; preds = %cont + %3 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) + %4 = extractvalue { i32, i1 } %3, 0 + %5 = extractvalue { i32, i1 } %3, 1 + br i1 %5, label %trap, label %cond.end + +cond.end: ; preds = %cond.false, %cont + %cond = phi i32 [ 0, %cont ], [ %4, %cond.false ] + ret i32 %cond +} + +define i32 @signed_sub(i32 %x, i32 %y) { +; CHECK: define i32 @signed_sub +; CHECK-NOT: @llvm.sadd.with.overflow.i32 +; CHECK: @llvm.ssub.with.overflow.i32 +entry: + %cmp = icmp slt i32 %y, 0 + br i1 %cmp, label %land.lhs.true, label %lor.lhs.false + +land.lhs.true: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %y, i32 2147483647) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: ; preds = %land.lhs.true, %land.lhs.true3, %cond.false + tail call void @llvm.trap() + unreachable + +cont: ; preds = %land.lhs.true + %2 = extractvalue { i32, i1 } %0, 0 + %cmp1 = icmp slt i32 %2, %x + br i1 %cmp1, label %cond.end, label %cond.false + +lor.lhs.false: ; preds = %entry + %cmp2 = icmp eq i32 %y, 0 + br i1 %cmp2, label %cond.false, label %land.lhs.true3 + +land.lhs.true3: ; preds = %lor.lhs.false + %3 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %y, i32 -2147483648) + %4 = extractvalue { i32, i1 } %3, 1 + br i1 %4, label %trap, label %cont4 + +cont4: ; preds = %land.lhs.true3 + %5 = extractvalue { i32, i1 } %3, 0 + %cmp5 = icmp sgt i32 %5, %x + br i1 %cmp5, label %cond.end, label %cond.false + +cond.false: ; preds = %lor.lhs.false, %cont, %cont4 + %6 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y) + %7 = extractvalue { i32, i1 } %6, 0 + %8 = extractvalue { i32, i1 } %6, 1 + br i1 %8, label %trap, label %cond.end + +cond.end: ; preds = %cond.false, %cont, %cont4 + %cond = phi i32 [ 0, %cont4 ], [ 0, %cont ], [ %7, %cond.false ] + ret i32 %cond +} + +define i32 @unsigned_sub(i32 %x, i32 %y) { +; CHECK: define i32 @unsigned_sub +; CHECK: @llvm.usub.with.overflow.i32 +entry: + %cmp = icmp ult i32 %x, %y + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: ; preds = %cond.false + tail call void @llvm.trap() + unreachable + +cond.end: ; preds = %cond.false, %entry + %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ] + ret i32 %cond +} + +define i32 @signed_add_r1(i32 %x) { +; CHECK: define i32 @signed_add_r1 +; CHECK-NOT: @llvm.sadd.with.overflow.i32 +entry: + %cmp = icmp eq i32 %x, 2147483647 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: ; preds = %cond.false + tail call void @llvm.trap() + unreachable + +cond.end: ; preds = %cond.false, %entry + %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ] + ret i32 %cond +} + +define i32 @unsigned_add_r1(i32 %x) { +; CHECK: define i32 @unsigned_add_r1 +; CHECK-NOT: @llvm.uadd.with.overflow.i32 +entry: + %cmp = icmp eq i32 %x, -1 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: ; preds = %cond.false + tail call void @llvm.trap() + unreachable + +cond.end: ; preds = %cond.false, %entry + %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ] + ret i32 %cond +} + +define i32 @signed_sub_r1(i32 %x) { +; CHECK: define i32 @signed_sub_r1 +; CHECK-NOT: @llvm.ssub.with.overflow.i32 +entry: + %cmp = icmp eq i32 %x, -2147483648 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: ; preds = %cond.false + tail call void @llvm.trap() + unreachable + +cond.end: ; preds = %cond.false, %entry + %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ] + ret i32 %cond +} + +define i32 @unsigned_sub_r1(i32 %x) { +; CHECK: define i32 @unsigned_sub_r1 +; CHECK-NOT: @llvm.usub.with.overflow.i32 +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: ; preds = %cond.false + tail call void @llvm.trap() + unreachable + +cond.end: ; preds = %cond.false, %entry + %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ] + ret i32 %cond +} + +define i32 @signed_add_rn1(i32 %x) { +; CHECK: define i32 @signed_add_rn1 +; CHECK-NOT: @llvm.sadd.with.overflow.i32 +entry: + %cmp = icmp eq i32 %x, -2147483648 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 -1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: ; preds = %cond.false + tail call void @llvm.trap() + unreachable + +cond.end: ; preds = %cond.false, %entry + %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ] + ret i32 %cond +} + +define i32 @signed_sub_rn1(i32 %x) { +; CHECK: define i32 @signed_sub_rn1 +; CHECK-NOT: @llvm.ssub.with.overflow.i32 +entry: + %cmp = icmp eq i32 %x, 2147483647 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 -1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: ; preds = %cond.false + tail call void @llvm.trap() + unreachable + +cond.end: ; preds = %cond.false, %entry + %cond = phi i32 [ 0, %entry ], [ %1, %cond.false ] + ret i32 %cond +} + +declare i32 @bar(i32) + +define void @unsigned_loop(i32 %i) { +; CHECK: define void @unsigned_loop +; CHECK-NOT: @llvm.usub.with.overflow.i32 +entry: + %cmp3 = icmp eq i32 %i, 0 + br i1 %cmp3, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %cont + %i.addr.04 = phi i32 [ %2, %cont ], [ %i, %while.body.preheader ] + %call = tail call i32 @bar(i32 %i.addr.04) + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.addr.04, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: ; preds = %while.body + tail call void @llvm.trap() + unreachable + +cont: ; preds = %while.body + %2 = extractvalue { i32, i1 } %0, 0 + %cmp = icmp eq i32 %2, 0 + br i1 %cmp, label %while.end, label %while.body + +while.end: ; preds = %cont, %entry + ret void +} + +define void @intrinsic_into_phi(i32 %n) { +; CHECK: @intrinsic_into_phi(i32 %n) { +; CHECK: @llvm.sadd.with.overflow.i32 +; CHECK-NOT: @llvm.sadd.with.overflow.i32 +entry: + br label %cont + +for.cond: ; preds = %while.end + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %.lcssa, i32 1) + %1 = extractvalue { i32, i1 } %0, 1 + br i1 %1, label %trap, label %cont + +trap: ; preds = %for.cond, %while.body + tail call void @llvm.trap() + unreachable + +cont: ; preds = %entry, %for.cond + %2 = phi { i32, i1 } [ zeroinitializer, %entry ], [ %0, %for.cond ] + %3 = extractvalue { i32, i1 } %2, 0 + %call9 = tail call i32 @bar(i32 %3) + %tobool10 = icmp eq i32 %call9, 0 + br i1 %tobool10, label %while.end, label %while.body.preheader + +while.body.preheader: ; preds = %cont + br label %while.body + +while.cond: ; preds = %while.body + %4 = extractvalue { i32, i1 } %6, 0 + %call = tail call i32 @bar(i32 %4) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %while.end, label %while.body + +while.body: ; preds = %while.body.preheader, %while.cond + %5 = phi i32 [ %4, %while.cond ], [ %3, %while.body.preheader ] + %6 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %5, i32 1) + %7 = extractvalue { i32, i1 } %6, 1 + br i1 %7, label %trap, label %while.cond + +while.end: ; preds = %while.cond, %cont + %.lcssa = phi i32 [ %3, %cont ], [ %4, %while.cond ] + %cmp = icmp slt i32 %.lcssa, %n + br i1 %cmp, label %for.cond, label %cleanup2 + +cleanup2: ; preds = %while.end + ret void +}