Index: llvm/trunk/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ValueTracking.h +++ llvm/trunk/include/llvm/Analysis/ValueTracking.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -325,6 +326,11 @@ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); + /// Returns true if the arithmetic part of the \p II 's result is + /// used only along the paths control dependent on the computation + /// not overflowing, \p II being an .with.overflow intrinsic. + bool isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT); + /// Return true if this function can prove that the instruction I will /// always transfer execution to one of its successors (including the next /// instruction that follows within a basic block). E.g. this is not Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -3831,7 +3831,7 @@ /// Try to map \p V into a BinaryOp, and return \c None on failure. -static Optional MatchBinaryOp(Value *V) { +static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { auto *Op = dyn_cast(V); if (!Op) return None; @@ -3877,6 +3877,50 @@ } return BinaryOp(Op); + case Instruction::ExtractValue: { + auto *EVI = cast(Op); + if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0) + break; + + auto *CI = dyn_cast(EVI->getAggregateOperand()); + if (!CI) + break; + + if (auto *F = CI->getCalledFunction()) + switch (F->getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: { + if (!isOverflowIntrinsicNoWrap(cast(CI), DT)) + return BinaryOp(Instruction::Add, CI->getArgOperand(0), + CI->getArgOperand(1)); + + // Now that we know that all uses of the arithmetic-result component of + // CI are guarded by the overflow check, we can go ahead and pretend + // that the arithmetic is non-overflowing. + if (F->getIntrinsicID() == Intrinsic::sadd_with_overflow) + return BinaryOp(Instruction::Add, CI->getArgOperand(0), + CI->getArgOperand(1), /* IsNSW = */ true, + /* IsNUW = */ false); + else + return BinaryOp(Instruction::Add, CI->getArgOperand(0), + CI->getArgOperand(1), /* IsNSW = */ false, + /* IsNUW*/ true); + } + + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + return BinaryOp(Instruction::Sub, CI->getArgOperand(0), + CI->getArgOperand(1)); + + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + return BinaryOp(Instruction::Mul, CI->getArgOperand(0), + CI->getArgOperand(1)); + default: + break; + } + } + default: break; } @@ -3953,7 +3997,7 @@ // If the increment doesn't overflow, then neither the addrec nor // the post-increment will overflow. - if (auto BO = MatchBinaryOp(BEValueV)) { + if (auto BO = MatchBinaryOp(BEValueV, DT)) { if (BO->Opcode == Instruction::Add && BO->LHS == PN) { if (BO->IsNUW) Flags = setFlags(Flags, SCEV::FlagNUW); @@ -4833,7 +4877,7 @@ return getUnknown(V); Operator *U = cast(V); - if (auto BO = MatchBinaryOp(U)) { + if (auto BO = MatchBinaryOp(U, DT)) { switch (BO->Opcode) { case Instruction::Add: { // The simple thing to do would be to just call getSCEV on both operands @@ -4874,7 +4918,7 @@ else AddOps.push_back(getSCEV(BO->RHS)); - auto NewBO = MatchBinaryOp(BO->LHS); + auto NewBO = MatchBinaryOp(BO->LHS, DT); if (!NewBO || (NewBO->Opcode != Instruction::Add && NewBO->Opcode != Instruction::Sub)) { AddOps.push_back(getSCEV(BO->LHS)); @@ -4904,7 +4948,7 @@ } MulOps.push_back(getSCEV(BO->RHS)); - auto NewBO = MatchBinaryOp(BO->LHS); + auto NewBO = MatchBinaryOp(BO->LHS, DT); if (!NewBO || NewBO->Opcode != Instruction::Mul) { MulOps.push_back(getSCEV(BO->LHS)); break; Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -3253,6 +3253,67 @@ return OverflowResult::MayOverflow; } +bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) { +#ifndef NDEBUG + auto IID = II->getIntrinsicID(); + assert((IID == Intrinsic::sadd_with_overflow || + IID == Intrinsic::uadd_with_overflow || + IID == Intrinsic::ssub_with_overflow || + IID == Intrinsic::usub_with_overflow || + IID == Intrinsic::smul_with_overflow || + IID == Intrinsic::umul_with_overflow) && + "Not an overflow intrinsic!"); +#endif + + SmallVector GuardingBranches; + SmallVector Results; + + for (User *U : II->users()) { + if (auto *EVI = dyn_cast(U)) { + assert(EVI->getNumIndices() == 1 && "Obvious from CI's type"); + + if (EVI->getIndices()[0] == 0) + Results.push_back(EVI); + else { + assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type"); + + for (auto *U : EVI->users()) + if (auto *B = dyn_cast(U)) { + assert(B->isConditional() && "How else is it using an i1?"); + GuardingBranches.push_back(B); + } + } + } else { + // We are using the aggregate directly in a way we don't want to analyze + // here (storing it to a global, say). + return false; + } + } + + auto AllUsesGuardedByBranch = [&](BranchInst *BI) { + BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1)); + if (!NoWrapEdge.isSingleEdge()) + return false; + + // Check if all users of the add are provably no-wrap. + for (auto *Result : Results) { + // If the extractvalue itself is not executed on overflow, the we don't + // need to check each use separately, since domination is transitive. + if (DT.dominates(NoWrapEdge, Result->getParent())) + continue; + + for (auto &RU : Result->uses()) + if (!DT.dominates(NoWrapEdge, RU)) + return false; + } + + return true; + }; + + return any_of(GuardingBranches, AllUsesGuardedByBranch); +} + + OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, Index: llvm/trunk/test/Analysis/ScalarEvolution/overflow-intrinsics.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/overflow-intrinsics.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/overflow-intrinsics.ll @@ -0,0 +1,309 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f_sadd_0(i8* %a) { +; CHECK-LABEL: Classifying expressions for: @f_sadd_0 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] +; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,16) S: [0,16) + + %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + br i1 %tmp1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap() #2, !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + %cmp = icmp slt i32 %tmp2, 16 + br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: Loop %for.body: max backedge-taken count is 15 +} + +define void @f_sadd_1(i8* %a) { +; CHECK-LABEL: Classifying expressions for: @f_sadd_1 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] +; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,16) S: [0,16) + +; SCEV can prove for the above induction variable; but it does +; not bother so before it sees the sext below since it is not a 100% +; obvious. + + %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + br i1 %tmp1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + + br label %cont + +cont: ; preds = %for.body + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + %cmp = icmp slt i32 %tmp2, 16 + br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: Loop %for.body: max backedge-taken count is 15 +} + +define void @f_sadd_2(i8* %a, i1* %c) { +; CHECK-LABEL: Classifying expressions for: @f_sadd_2 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] +; CHECK-NEXT: --> {0,+,1}<%for.body> + + %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + br i1 %tmp1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + + br label %cont + +cont: ; preds = %for.body + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + %cond = load volatile i1, i1* %c + br i1 %cond, label %for.body, label %for.cond.cleanup +} + +define void @f_sadd_3(i8* %a, i1* %c) { +; CHECK-LABEL: Classifying expressions for: @f_sadd_3 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %for.body ] +; CHECK-NEXT: --> {0,+,1}<%for.body> + + %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %for.body ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + br i1 %tmp1, label %trap, label %for.body, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap() #2, !nosanitize !{} + unreachable, !nosanitize !{} +} + +define void @f_sadd_4(i8* %a, i1* %c) { +; CHECK-LABEL: Classifying expressions for: @f_sadd_4 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %merge ] +; CHECK-NEXT: --> {0,+,1}<%for.body> + + %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %merge ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + br i1 %tmp1, label %notrap, label %merge + +notrap: + br label %merge + +merge: + %tmp3 = extractvalue { i32, i1 } %tmp0, 1 + br i1 %tmp3, label %trap, label %for.body, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap() #2, !nosanitize !{} + unreachable, !nosanitize !{} +} + +define void @f_sadd_may_overflow(i8* %a, i1* %c) { +; CHECK-LABEL: Classifying expressions for: @f_sadd_may_overflow +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 0, %entry ], [ %tmp1, %cont ] +; CHECK-NEXT: --> {0,+,1}<%for.body> U: full-set S: full-set + + %i.04 = phi i32 [ 0, %entry ], [ %tmp1, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1) + %cond1 = load volatile i1, i1* %c + br i1 %cond1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap() #2, !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %tmp1 = extractvalue { i32, i1 } %tmp0, 0 + %cond = load volatile i1, i1* %c + br i1 %cond, label %for.body, label %for.cond.cleanup +} + +define void @f_uadd(i8* %a) { +; CHECK-LABEL: Classifying expressions for: @f_uadd +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] +; CHECK-NEXT: --> {0,+,1}<%for.body> U: [0,16) S: [0,16) + + %i.04 = phi i32 [ 0, %entry ], [ %tmp2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + br i1 %tmp1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + %cmp = icmp slt i32 %tmp2, 16 + br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: Loop %for.body: max backedge-taken count is 15 +} + +define void @f_ssub(i8* nocapture %a) { +; CHECK-LABEL: Classifying expressions for: @f_ssub +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ] +; CHECK-NEXT: --> {15,+,-1}<%for.body> U: [0,16) S: [0,16) + + %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + br i1 %tmp1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + %cmp = icmp sgt i32 %tmp2, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: Loop %for.body: max backedge-taken count is 15 +} + +define void @f_usub(i8* nocapture %a) { +; CHECK-LABEL: Classifying expressions for: @f_usub +entry: + br label %for.body + +for.cond.cleanup: ; preds = %cont + ret void + +for.body: ; preds = %entry, %cont +; CHECK: %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ] +; CHECK-NEXT: --> {15,+,-1}<%for.body> U: [0,16) S: [0,16) + + %i.04 = phi i32 [ 15, %entry ], [ %tmp2, %cont ] + %idxprom = sext i32 %i.04 to i64 + %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %tmp0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1) + %tmp1 = extractvalue { i32, i1 } %tmp0, 1 + br i1 %tmp1, label %trap, label %cont, !nosanitize !{} + +trap: ; preds = %for.body + tail call void @llvm.trap(), !nosanitize !{} + unreachable, !nosanitize !{} + +cont: ; preds = %for.body + %tmp2 = extractvalue { i32, i1 } %tmp0, 0 + %cmp = icmp sgt i32 %tmp2, -1 + br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: Loop %for.body: max backedge-taken count is 15 +} + +define i32 @f_smul(i32 %val_a, i32 %val_b) { +; CHECK-LABEL: Classifying expressions for: @f_smul + %agg = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %val_a, i32 %val_b) +; CHECK: %mul = extractvalue { i32, i1 } %agg, 0 +; CHECK-NEXT: --> (%val_a * %val_b) U: full-set S: full-set + %mul = extractvalue { i32, i1 } %agg, 0 + ret i32 %mul +} + +define i32 @f_umul(i32 %val_a, i32 %val_b) { +; CHECK-LABEL: Classifying expressions for: @f_umul + %agg = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %val_a, i32 %val_b) +; CHECK: %mul = extractvalue { i32, i1 } %agg, 0 +; CHECK-NEXT: --> (%val_a * %val_b) U: full-set S: full-set + %mul = extractvalue { i32, i1 } %agg, 0 + ret i32 %mul +} + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone +declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone + +declare void @llvm.trap() #2