Index: include/llvm/Analysis/CodeMetrics.h =================================================================== --- include/llvm/Analysis/CodeMetrics.h +++ include/llvm/Analysis/CodeMetrics.h @@ -16,10 +16,12 @@ #define LLVM_ANALYSIS_CODEMETRICS_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/CallSite.h" namespace llvm { class BasicBlock; +class Loop; class Function; class Instruction; class DataLayout; @@ -85,7 +87,23 @@ NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {} /// \brief Add information about a block to the current state. - void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI); + void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, + SmallPtrSetImpl &EphValues); + + /// \brief Collect a block's ephemeral values (those used only by invariants + /// or similar intrinsics in the block). + static void collectEphemeralValues(const BasicBlock *BB, + SmallPtrSetImpl &EphValues); + + /// \brief Collect a loop's ephemeral values (those used only by invariants + /// or similar intrinsics in the loop). + static void collectEphemeralValues(const Loop *L, + SmallPtrSetImpl &EphValues); + + /// \brief Collect a functions's ephemeral values (those used only by + /// invariants or similar intrinsics in the function). + static void collectEphemeralValues(const Function *L, + SmallPtrSetImpl &EphValues); }; } Index: lib/Analysis/CodeMetrics.cpp =================================================================== --- lib/Analysis/CodeMetrics.cpp +++ lib/Analysis/CodeMetrics.cpp @@ -12,22 +12,118 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "code-metrics" using namespace llvm; +static void collectEphemeralValueRoots(const BasicBlock *BB, + SmallVector &WorkSet){ + for (BasicBlock::const_iterator J = BB->getFirstInsertionPt(), JE = BB->end(); + J != JE; ++J) + if (const CallInst *CI = dyn_cast(J)) + if (Function *F = CI->getCalledFunction()) + switch (F->getIntrinsicID()) { + default: break; + // FIXME: This list is repeated from NoTTI::getIntrinsicCost. + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't actually represent code after lowering. + WorkSet.push_back(CI); + break; + } +} + +static void completeEphemeralValues(SmallVector &WorkSet, + SmallPtrSetImpl &EphValues) { + SmallPtrSet Visited; + + while (!WorkSet.empty()) { + const Value *V = WorkSet.pop_back_val(); + if (!Visited.insert(V)) + continue; + + // If all uses of this value are ephemeral, then so is this value. + bool FoundNEUse = false; + for (const User *I : V->users()) + if (!EphValues.count(I)) { + FoundNEUse = true; + break; + } + + if (FoundNEUse) + continue; + + EphValues.insert(V); + DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n"); + + if (const User *U = dyn_cast(V)) + for (const Value *J : U->operands()) { + if (isSafeToSpeculativelyExecute(J)) + WorkSet.push_back(J); + } + } +} + +// Find all ephemeral values. +void CodeMetrics::collectEphemeralValues(const BasicBlock *BB, + SmallPtrSetImpl &EphValues) { + SmallVector WorkSet; + + collectEphemeralValueRoots(BB, WorkSet); + completeEphemeralValues(WorkSet, EphValues); +} + +void CodeMetrics::collectEphemeralValues(const Loop *L, + SmallPtrSetImpl &EphValues) { + SmallVector WorkSet; + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + collectEphemeralValueRoots(*I, WorkSet); + + completeEphemeralValues(WorkSet, EphValues); +} + +void CodeMetrics::collectEphemeralValues(const Function *F, + SmallPtrSetImpl &EphValues) { + SmallVector WorkSet; + + for (Function::const_iterator I : *F) + collectEphemeralValueRoots(I, WorkSet); + + completeEphemeralValues(WorkSet, EphValues); +} + /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + SmallPtrSetImpl &EphValues) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { + // Skip ephemeral values. + if (EphValues.count(II)) + continue; + // Special handling for calls. if (isa(II) || isa(II)) { ImmutableCallSite CS(cast(II)); Index: lib/Analysis/IPA/InlineCost.cpp =================================================================== --- lib/Analysis/IPA/InlineCost.cpp +++ lib/Analysis/IPA/InlineCost.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" @@ -104,7 +105,7 @@ ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); // Custom analysis routines. - bool analyzeBlock(BasicBlock *BB); + bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. @@ -881,7 +882,8 @@ /// aborts early if the threshold has been exceeded or an impossible to inline /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. -bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { +bool CallAnalyzer::analyzeBlock(BasicBlock *BB, + SmallPtrSetImpl &EphValues) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { // FIXME: Currently, the number of instructions in a function regardless of // our ability to simplify them during inline to constants or dead code, @@ -893,6 +895,10 @@ if (isa(I)) continue; + // Skip ephemeral values. + if (EphValues.count(I)) + continue; + ++NumInstructions; if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; @@ -1096,6 +1102,9 @@ NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); NumAllocaArgs = SROAArgValues.size(); + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(&F, EphValues); + // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this // particular call site in order to get more accurate cost estimates. This @@ -1129,7 +1138,7 @@ // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. - if (!analyzeBlock(BB)) { + if (!analyzeBlock(BB, EphValues)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || HasIndirectBr) return false; Index: lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- lib/Transforms/Scalar/LoopRotation.cpp +++ lib/Transforms/Scalar/LoopRotation.cpp @@ -323,8 +323,11 @@ // Check size of original header and reject loop if it is very big or we can't // duplicate blocks inside it. { + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, EphValues); + CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI); + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); if (Metrics.notDuplicatable) { DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" << " instructions: "; L->dump()); Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -202,10 +202,13 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, const TargetTransformInfo &TTI) { + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, EphValues); + CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I, TTI); + Metrics.analyzeBasicBlock(*I, TTI, EphValues); NumCalls = Metrics.NumInlineCandidates; NotDuplicatable = Metrics.notDuplicatable; Index: lib/Transforms/Scalar/LoopUnswitch.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnswitch.cpp +++ lib/Transforms/Scalar/LoopUnswitch.cpp @@ -229,13 +229,16 @@ // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, EphValues); + // FIXME: This is overly conservative because it does not take into // consideration code simplification opportunities and code that can // be shared by the resultant unswitched loops. CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) - Metrics.analyzeBasicBlock(*I, TTI); + Metrics.analyzeBasicBlock(*I, TTI, EphValues); Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); Index: test/Transforms/Inline/ephemeral.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/ephemeral.ll @@ -0,0 +1,29 @@ +; RUN: opt -S -Oz %s | FileCheck %s + +@a = global i32 4 + +define i1 @inner() { + %a1 = load volatile i32* @a + %x1 = add i32 %a1, %a1 + %c = icmp eq i32 %x1, 0 + + %a2 = mul i32 %a1, %a1 + %a3 = sub i32 %a1, 5 + %a4 = udiv i32 %a3, -13 + %a5 = mul i32 %a4, %a4 + %a6 = add i32 %a5, %x1 + %ca = icmp sgt i32 %a6, -7 + tail call void @llvm.invariant(i1 %ca) + + ret i1 %c +} + +; @inner() should be inlined for -Oz. +; CHECK-NOT: call i1 @inner +define i1 @outer() optsize { + %r = call i1 @inner() + ret i1 %r +} + +declare void @llvm.invariant(i1) nounwind + Index: test/Transforms/LoopUnroll/ephemeral.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/ephemeral.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s + +; CHECK-LABEL: @test1 +; CHECK: for.body: +; CHECK-NOT: for.end: + +define i32 @test1(i32* nocapture %a) nounwind uwtable readonly { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + + %1 = add nsw i32 %0, 2 + %2 = add nsw i32 %1, 4 + %3 = add nsw i32 %2, 4 + %4 = add nsw i32 %3, 4 + %5 = add nsw i32 %4, 4 + %6 = add nsw i32 %5, 4 + %7 = add nsw i32 %6, 4 + %8 = add nsw i32 %7, 4 + %9 = add nsw i32 %8, 4 + %10 = add nsw i32 %9, 4 + %ca = icmp sgt i32 %10, -7 + call void @llvm.invariant(i1 %ca) + + %add = add nsw i32 %0, %sum.01 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 5 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret i32 %add +} + +declare void @llvm.invariant(i1) nounwind +