Index: include/llvm/Analysis/LazyValueInfo.h =================================================================== --- include/llvm/Analysis/LazyValueInfo.h +++ include/llvm/Analysis/LazyValueInfo.h @@ -23,6 +23,7 @@ class DataLayout; class DominatorTree; class Instruction; + class IntrinsicInst; class TargetLibraryInfo; class Value; @@ -70,6 +71,9 @@ Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB, Instruction *CxtI = nullptr); + // Return false if we can prove that overflow does not occur + bool mayOverflow(IntrinsicInst *I); + /// Inform the analysis cache that we have threaded an edge from /// PredBB to OldSucc to be from PredBB to NewSucc instead. void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc); Index: include/llvm/IR/ConstantRange.h =================================================================== --- include/llvm/IR/ConstantRange.h +++ include/llvm/IR/ConstantRange.h @@ -289,6 +289,13 @@ /// ConstantRange inverse() const; + typedef ConstantRange + (ConstantRange::*ConstantRangeOp)(const ConstantRange &) const; + + // Return false if we can prove that overflow does not occur + static bool mayOverflow(const ConstantRange &L, const ConstantRange &R, + ConstantRangeOp Op, bool Signed); + /// Print out the bounds to a stream. /// void print(raw_ostream &OS) const; Index: lib/Analysis/LazyValueInfo.cpp =================================================================== --- lib/Analysis/LazyValueInfo.cpp +++ lib/Analysis/LazyValueInfo.cpp @@ -1412,6 +1412,64 @@ return nullptr; } +bool LazyValueInfo::mayOverflow(IntrinsicInst *I) { + BasicBlock *BB = I->getParent(); + const DataLayout &DL = BB->getModule()->getDataLayout(); + LVILatticeVal L = getCache(PImpl, AC, &DL, DT). + getValueInBlock(I->getOperand(0), BB, nullptr); + LVILatticeVal R = getCache(PImpl, AC, &DL, DT). + getValueInBlock(I->getOperand(1), BB, nullptr); + + ConstantRange LCR(1), RCR(1); + if (L.isUndefined() || R.isUndefined()) + return false; + if (L.isConstant()) { + if (ConstantInt *CI = dyn_cast(L.getConstant())) + LCR = ConstantRange(CI->getValue()); + else + return true; + } else { + if (L.isConstantRange()) + LCR = L.getConstantRange(); + else + return true; + } + if (R.isConstant()) { + if (ConstantInt *CI = dyn_cast(R.getConstant())) + RCR = ConstantRange(CI->getValue()); + else + return true; + } else { + if (R.isConstantRange()) + RCR = R.getConstantRange(); + else + return true; + } + + switch (I->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + return ConstantRange::mayOverflow(LCR, RCR, &ConstantRange::add, + /*signed=*/true); + case Intrinsic::ssub_with_overflow: + return ConstantRange::mayOverflow(LCR, RCR, &ConstantRange::sub, + /*signed=*/true); + case Intrinsic::smul_with_overflow: + return ConstantRange::mayOverflow(LCR, RCR, &ConstantRange::multiply, + /*signed=*/true); + case Intrinsic::uadd_with_overflow: + return ConstantRange::mayOverflow(LCR, RCR, &ConstantRange::add, + /*signed=*/false); + case Intrinsic::usub_with_overflow: + return ConstantRange::mayOverflow(LCR, RCR, &ConstantRange::sub, + /*signed=*/false); + case Intrinsic::umul_with_overflow: + return ConstantRange::mayOverflow(LCR, RCR, &ConstantRange::multiply, + /*signed=*/false); + default: + llvm_unreachable("expected overflow inst"); + } +} + static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C, LVILatticeVal &Result, const DataLayout &DL, Index: lib/IR/ConstantRange.cpp =================================================================== --- lib/IR/ConstantRange.cpp +++ lib/IR/ConstantRange.cpp @@ -823,7 +823,7 @@ ConstantRange::lshr(const ConstantRange &Other) const { if (isEmptySet() || Other.isEmptySet()) return ConstantRange(getBitWidth(), /*isFullSet=*/false); - + APInt max = getUnsignedMax().lshr(Other.getUnsignedMin()); APInt min = getUnsignedMin().lshr(Other.getUnsignedMax()); if (min == max + 1) @@ -840,6 +840,36 @@ return ConstantRange(Upper, Lower); } +// Return false if we can prove that overflow does not occur. The +// strategy is to do a wider version of the interval arithmetic and +// then check if the result has an empty intersection with this +// ConstantRange: +// +// orig INT_MIN orig INT_MAX +// | | +// v v +// --------------------U L-------------------- +// +bool ConstantRange::mayOverflow(const ConstantRange &L, const ConstantRange &R, + ConstantRangeOp Op, bool Signed) { + const unsigned Width = L.getBitWidth(); + APInt Max, Min; + ConstantRange LExt(1), RExt(1); + if (Signed) { + Max = APInt::getSignedMaxValue(Width).sext(2 * Width); + Min = APInt::getSignedMinValue(Width).sext(2 * Width); + LExt = L.signExtend(2 * Width); + RExt = R.signExtend(2 * Width); + } else { + Max = APInt::getMaxValue(Width).zext(2 * Width); + Min = APInt::getMinValue(Width).zext(2 * Width); + LExt = L.zeroExtend(2 * Width); + RExt = R.zeroExtend(2 * Width); + } + const ConstantRange OverflowRange(Max + 1, Min); + return !(LExt.*Op)(RExt).intersectWith(OverflowRange).isEmptySet(); +} + /// print - Print out the bounds to a stream... /// void ConstantRange::print(raw_ostream &OS) const { Index: lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -20,10 +20,12 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -36,6 +38,7 @@ STATISTIC(NumReturns, "Number of return values propagated"); STATISTIC(NumDeadCases, "Number of switch cases removed"); STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); +STATISTIC(NumOverflows, "Number of overflow intrinsics eliminated"); namespace { class CorrelatedValuePropagation : public FunctionPass { @@ -47,6 +50,7 @@ bool processCmp(CmpInst *C); bool processSwitch(SwitchInst *SI); bool processCallSite(CallSite CS); + bool processIntrinsic(IntrinsicInst *II); bool processSDiv(BinaryOperator *SDI); /// Return a constant value for V usable at At and everything it @@ -339,6 +343,65 @@ return true; } +// copy and paste from InstCombineInternal.h +static Instruction *CreateOverflowTuple(IntrinsicInst *II, Value *Result, + Constant *Overflow) { + Constant *V[] = {UndefValue::get(Result->getType()), Overflow}; + StructType *ST = cast(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); + return InsertValueInst::Create(Struct, Result, 0); +} + +bool CorrelatedValuePropagation::processIntrinsic(IntrinsicInst *II) { + Instruction::BinaryOps Opcode; + bool NSW; + switch (II->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + Opcode = Instruction::Add; + NSW = true; + break; + case Intrinsic::ssub_with_overflow: + Opcode = Instruction::Sub; + NSW = true; + break; + case Intrinsic::smul_with_overflow: + Opcode = Instruction::Mul; + NSW = true; + break; + case Intrinsic::uadd_with_overflow: + Opcode = Instruction::Add; + NSW = false; + break; + case Intrinsic::usub_with_overflow: + Opcode = Instruction::Sub; + NSW = false; + break; + case Intrinsic::umul_with_overflow: + Opcode = Instruction::Mul; + NSW = false; + break; + default: + return false; + } + + if (LVI->mayOverflow(II)) + return false; + + BinaryOperator *NewInst = BinaryOperator::Create(Opcode, II->getOperand(0), + II->getOperand(1), "", II); + if (NSW) + NewInst->setHasNoSignedWrap(); + else + NewInst->setHasNoUnsignedWrap(); + auto &C = II->getParent()->getContext(); + Constant *OF = ConstantInt::getFalse(Type::getInt1Ty(C)); + Instruction *Tup = CreateOverflowTuple(II, NewInst, OF); + ReplaceInstWithInst(II, Tup); + NumOverflows++; + + return true; +} + /// See if LazyValueInfo's ability to exploit edge conditions, or range /// information is sufficient to prove the both operands of this SDiv are /// positive. If this is the case, replace the SDiv with a UDiv. Even for local @@ -426,6 +489,13 @@ BBChanged |= processMemAccess(II); break; case Instruction::Call: + if (IntrinsicInst *Intrinsic = dyn_cast(II)) { + bool result = processIntrinsic(Intrinsic); + BBChanged |= result; + if (result) + break; + } + // FALLTHROUGH case Instruction::Invoke: BBChanged |= processCallSite(CallSite(II)); break; Index: test/Transforms/CorrelatedValuePropagation/signed-overflow.ll =================================================================== --- test/Transforms/CorrelatedValuePropagation/signed-overflow.ll +++ test/Transforms/CorrelatedValuePropagation/signed-overflow.ll @@ -0,0 +1,192 @@ +; RUN: opt < %s -correlated-propagation -instcombine -simplifycfg -S | FileCheck %s + +; overflow is impossible in (x == INT_MAX) ? INT_MAX : x + 1 +; CHECK-LABEL: @foo1( +; CHECK-NOT: sadd.with.overflow +define i32 @foo1(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, 2147483647 + br i1 %cmp, label %cond.end, label %cond.true + +cond.true: + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ 2147483647, %entry ] + ret i32 %cond +} + +; overflow is possible in (x == INT_MAX) ? INT_MAX : x + 2 +; CHECK-LABEL: @foo2( +; CHECK: sadd.with.overflow +define i32 @foo2(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, 2147483647 + br i1 %cmp, label %cond.end, label %cond.true + +cond.true: + %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 2) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ 2147483647, %entry ] + ret i32 %cond +} + +; overflow is impossible in (x == INT_MIN) ? INT_MIN : x - 1 +; CHECK-LABEL: @foo3( +; CHECK-NOT: ssub.with.overflow +define i32 @foo3(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, -2147483648 + br i1 %cmp, label %cond.end, label %cond.true + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ -2147483648, %entry ] + ret i32 %cond +} + +; overflow is possible in (x == INT_MIN) ? INT_MIN : x - 2 +; CHECK-LABEL: @foo4( +; CHECK: ssub.with.overflow +define i32 @foo4(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, -2147483648 + br i1 %cmp, label %cond.end, label %cond.true + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 2) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ -2147483648, %entry ] + ret i32 %cond +} + +; overflow is impossible in x < (INT_MAX / 2 + 1) && x >= 0 ? x * 2 : x +; CHECK-LABEL: @foo5( +; CHECK-NOT: smul.with.overflow +define i32 @foo5(i32 %x) #0 { +entry: + %0 = icmp ult i32 %x, 1073741824 + br i1 %0, label %cond.true, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %1 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 2) + %2 = extractvalue { i32, i1 } %1, 0 + %3 = extractvalue { i32, i1 } %1, 1 + br i1 %3, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %2, %cond.true ], [ %x, %entry ] + ret i32 %cond +} + +; overflow is possible in x < (INT_MAX / 2 + 2) && x >= 0 ? x * 2 : x; +; CHECK-LABEL: @foo6( +; CHECK: smul.with.overflow +define i32 @foo6(i32 %x) #0 { +entry: + %0 = icmp ult i32 %x, 1073741825 + br i1 %0, label %cond.true, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %1 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 2) + %2 = extractvalue { i32, i1 } %1, 0 + %3 = extractvalue { i32, i1 } %1, 1 + br i1 %3, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %2, %cond.true ], [ %x, %entry ] + ret i32 %cond +} + +; overflow is impossible in x > (INT_MIN / 2 - 1) && x <= 0 ? x * 2 : x +; CHECK-LABEL: @foo7( +; CHECK-NOT: smul.with.overflow +define i32 @foo7(i32 %x) #0 { +entry: + %x.off = add i32 %x, 1073741824 + %0 = icmp ult i32 %x.off, 1073741825 + br i1 %0, label %cond.true, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %1 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 2) + %2 = extractvalue { i32, i1 } %1, 0 + %3 = extractvalue { i32, i1 } %1, 1 + br i1 %3, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %2, %cond.true ], [ %x, %entry ] + ret i32 %cond +} + +; overflow is possible in x > (INT_MIN / 2 - 2) && x <= 0 ? x * 2 : x +; CHECK-LABEL: @foo8( +; CHECK: smul.with.overflow +define i32 @foo8(i32 %x) #0 { +entry: + %x.off = add i32 %x, 1073741825 + %0 = icmp ult i32 %x.off, 1073741826 + br i1 %0, label %cond.true, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %1 = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %x, i32 2) + %2 = extractvalue { i32, i1 } %1, 0 + %3 = extractvalue { i32, i1 } %1, 1 + br i1 %3, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %2, %cond.true ], [ %x, %entry ] + ret i32 %cond +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) #1 +declare void @llvm.trap() #2 Index: test/Transforms/CorrelatedValuePropagation/unsigned-overflow.ll =================================================================== --- test/Transforms/CorrelatedValuePropagation/unsigned-overflow.ll +++ test/Transforms/CorrelatedValuePropagation/unsigned-overflow.ll @@ -0,0 +1,144 @@ +; RUN: opt < %s -correlated-propagation -instcombine -simplifycfg -S | FileCheck %s + +; overflow is impossible in (x == UINT_MAX) ? UINT_MAX : x + 1 +; CHECK-LABEL: @foo1( +; CHECK-NOT: uadd.with.overflow +define i32 @foo1(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, -1 + br i1 %cmp, label %cond.end, label %cond.true + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ -1, %entry ] + ret i32 %cond +} + +; overflow is possible in (x == UINT_MAX) ? UINT_MAX : x + 2 +; CHECK-LABEL: @foo2( +; CHECK: uadd.with.overflow +define i32 @foo2(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, -1 + br i1 %cmp, label %cond.end, label %cond.true + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 2) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ -1, %entry ] + ret i32 %cond +} + +; overflow is impossible in (x != 0) ? x - 1 : 0 +; CHECK-LABEL: @foo3( +; CHECK-NOT: usub.with.overflow +define i32 @foo3(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond.end, label %cond.true + +cond.true: + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 1) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ 0, %entry ] + ret i32 %cond +} + +; overflow is possible in (x != 0) ? x - 2 : 0 +; CHECK-LABEL: @foo4( +; CHECK: usub.with.overflow +define i32 @foo4(i32 %x) #0 { +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %cond.end, label %cond.true + +cond.true: + %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 2) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ 0, %entry ] + ret i32 %cond +} + +; overflow is impossible in x < (UINT_MAX / 2 + 1) ? x * 2 : x +; CHECK-LABEL: @foo5( +; CHECK-NOT: umul.with.overflow +define i32 @foo5(i32 %x) #0 { +entry: + %cmp = icmp sgt i32 %x, -1 + br i1 %cmp, label %cond.true, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %0 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 2) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ %x, %entry ] + ret i32 %cond +} + +; overflow is possible in x < (UINT_MAX / 2 + 2) ? x * 2 : x +; CHECK-LABEL: @foo6( +; CHECK: umul.with.overflow +define i32 @foo6(i32 %x) #0 { +entry: + %cmp = icmp ult i32 %x, -2147483647 + br i1 %cmp, label %cond.true, label %cond.end + +trap: + tail call void @llvm.trap() #2 + unreachable + +cond.true: + %0 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %x, i32 2) + %1 = extractvalue { i32, i1 } %0, 0 + %2 = extractvalue { i32, i1 } %0, 1 + br i1 %2, label %trap, label %cond.end + +cond.end: + %cond = phi i32 [ %1, %cond.true ], [ %x, %entry ] + ret i32 %cond +} + +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) #1 +declare void @llvm.trap() #2