Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -162,6 +162,13 @@ /// Keep track of values which map to a pointer base and constant offset. DenseMap> ConstantOffsetPtrs; + /// Model the elimination of repeated loads that is expected to happen + /// whenever we simplify away the stores that would otherwise cause them to be + /// loads. + bool EnableLoadElimination; + SmallPtrSet LoadAddrSet; + int LoadEliminationCost; + // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); bool lookupSROAArgAndCost(Value *V, Value *&Arg, @@ -170,6 +177,7 @@ void disableSROA(Value *V); void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); + void disableLoadElimination(); bool isGEPFree(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); @@ -261,10 +269,10 @@ ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), HasFrameEscape(false), AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), VectorBonus(0), SingleBBBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), - SROACostSavingsLost(0) {} + EnableLoadElimination(true), LoadEliminationCost(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -319,6 +327,7 @@ SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); + disableLoadElimination(); } /// \brief If 'V' maps to a SROA candidate, disable SROA for it. @@ -336,6 +345,13 @@ SROACostSavings += InstructionCost; } +void CallAnalyzer::disableLoadElimination() { + if (EnableLoadElimination) { + Cost += LoadEliminationCost; + EnableLoadElimination = false; + } +} + /// \brief Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any @@ -989,6 +1005,16 @@ disableSROA(CostIt); } + // If the data are already loaded from this address and there is no reachable + // stores/calls or all reachable stores can be SROA'd in the function, this + // load is likely to be redundant and can be eliminated. + if (EnableLoadElimination) { + if (!LoadAddrSet.insert(I.getPointerOperand()).second) { + LoadEliminationCost += InlineConstants::InstrCost; + return true; + } + } + return false; } @@ -1004,6 +1030,15 @@ disableSROA(CostIt); } + // The store can potentially clobber the repeated loads and prevent them from + // elimination. + // FIXME: + // 1. We can probably keep an initial set of eliminatable loads substracted + // from the cost even when we finally see a store. We just need to disable + // *further* accumulation of elimination savings. + // 2. We should probably at some point thread MemorySSA for the callee into + // this and then use that to actually compute *really* precise savings. + disableLoadElimination(); return false; } @@ -1067,6 +1102,9 @@ } bool CallAnalyzer::visitCallSite(CallSite CS) { + Function *Fun = CS.getCalledFunction(); + if (!Fun || !canConstantFoldCallTo(CS, Fun)) + disableLoadElimination(); if (CS.hasFnAttr(Attribute::ReturnsTwice) && !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. @@ -1628,6 +1666,7 @@ DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(LoadEliminationCost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); Index: test/Transforms/Inline/redundant-loads.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/redundant-loads.ll @@ -0,0 +1,70 @@ +; REQUIRES: asserts +; RUN: opt -inline < %s -S -debug-only=inline-cost 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK Analyzing call of inner1... (caller:outer1) +; CHECK LoadEliminationCost: 5 +define void @outer1(i32* %a) { + %b = alloca i32 + call void @inner1(i32* %a, i32* %b) + ret void +} + +define void @inner1(i32* %a, i32* %b) { + %1 = load i32, i32* %a + store i32 %1, i32 * %b + %2 = load i32, i32* %a + ret void +} + +; CHECK: Analyzing call of inner2... (caller:outer2) +; CHECK: LoadEliminationCost: 0 +define void @outer2(i32* %a, i32* %b) { + call void @inner2(i32* %a, i32* %b) + ret void +} + +define void @inner2(i32* %a, i32* %b) { + %1 = load i32, i32* %a + store i32 %1, i32 * %b + %2 = load i32, i32* %a + ret void +} + +; CHECK Analyzing call of inner3... (caller:outer3) +; CHECK LoadEliminationCost: 0 +define void @outer3(i32* %a) { + call void @inner3(i32* %a) + ret void +} + +declare void @foo() + +define void @inner3(i32* %a) { + %1 = load i32, i32* %a + call void @foo() + %2 = load i32, i32* %a + ret void +} + +; CHECK Analyzing call of inner4... (caller:outer4) +; CHECK LoadEliminationCost: 5 +define void @outer4(i32* %a, i32* %b, i32* %c) { + call void @inner4(i32* %a, i32* %b, i1 false) + ret void +} + +define void @inner4(i32* %a, i32* %b, i1 %pred) { + %1 = load i32, i32* %a + br i1 %pred, label %cond_true, label %cond_false + +cond_true: + store i32 %1, i32 * %b + br label %cond_false + +cond_false: + %2 = load i32, i32* %a + ret void +}