Index: llvm/trunk/lib/Analysis/InlineCost.cpp =================================================================== --- llvm/trunk/lib/Analysis/InlineCost.cpp +++ llvm/trunk/lib/Analysis/InlineCost.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -163,12 +164,20 @@ /// Keep track of values which map to a pointer base and constant offset. DenseMap> ConstantOffsetPtrs; + /// Keep track of dead blocks due to the constant arguments. + SetVector DeadBlocks; + + /// The mapping of the blocks to their known unique successors due to the + /// constant arguments. + DenseMap KnownSuccessors; + // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); bool lookupSROAArgAndCost(Value *V, Value *&Arg, DenseMap::iterator &CostIt); void disableSROA(DenseMap::iterator CostIt); void disableSROA(Value *V); + void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); bool isGEPFree(GetElementPtrInst &GEP); @@ -420,15 +429,94 @@ } bool CallAnalyzer::visitPHI(PHINode &I) { - // FIXME: We should potentially be tracking values through phi nodes, - // especially when they collapse to a single value due to deleted CFG edges - // during inlining. - // FIXME: We need to propagate SROA *disabling* through phi nodes, even // though we don't want to propagate it's bonuses. The idea is to disable // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. + + APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits()); + bool CheckSROA = I.getType()->isPointerTy(); + + // Track the constant or pointer with constant offset we've seen so far. + Constant *FirstC = nullptr; + std::pair FirstBaseAndOffset = {nullptr, ZeroOffset}; + Value *FirstV = nullptr; + + for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = I.getIncomingBlock(i); + // If the incoming block is dead, skip the incoming block. + if (DeadBlocks.count(Pred)) + continue; + // If the parent block of phi is not the known successor of the incoming + // block, skip the incoming block. + BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; + if (KnownSuccessor && KnownSuccessor != I.getParent()) + continue; + + Value *V = I.getIncomingValue(i); + // If the incoming value is this phi itself, skip the incoming value. + if (&I == V) + continue; + + Constant *C = dyn_cast(V); + if (!C) + C = SimplifiedValues.lookup(V); + + std::pair BaseAndOffset = {nullptr, ZeroOffset}; + if (!C && CheckSROA) + BaseAndOffset = ConstantOffsetPtrs.lookup(V); + + if (!C && !BaseAndOffset.first) + // The incoming value is neither a constant nor a pointer with constant + // offset, exit early. + return true; + + if (FirstC) { + if (FirstC == C) + // If we've seen a constant incoming value before and it is the same + // constant we see this time, continue checking the next incoming value. + continue; + // Otherwise early exit because we either see a different constant or saw + // a constant before but we have a pointer with constant offset this time. + return true; + } + + if (FirstV) { + // The same logic as above, but check pointer with constant offset here. + if (FirstBaseAndOffset == BaseAndOffset) + continue; + return true; + } + + if (C) { + // This is the 1st time we've seen a constant, record it. + FirstC = C; + continue; + } + + // The remaining case is that this is the 1st time we've seen a pointer with + // constant offset, record it. + FirstV = V; + FirstBaseAndOffset = BaseAndOffset; + } + + // Check if we can map phi to a constant. + if (FirstC) { + SimplifiedValues[&I] = FirstC; + return true; + } + + // Check if we can map phi to a pointer with constant offset. + if (FirstBaseAndOffset.first) { + ConstantOffsetPtrs[&I] = FirstBaseAndOffset; + + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + } + return true; } @@ -1512,6 +1600,44 @@ return cast(ConstantInt::get(IntPtrTy, Offset)); } +/// \brief Find dead blocks due to deleted CFG edges during inlining. +/// +/// If we know the successor of the current block, \p CurrBB, has to be \p +/// NextBB, the other successors of \p CurrBB are dead if these successors have +/// no live incoming CFG edges. If one block is found to be dead, we can +/// continue growing the dead block list by checking the successors of the dead +/// blocks to see if all their incoming edges are dead or not. +void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { + auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { + // A CFG edge is dead if the predecessor is dead or the predessor has a + // known successor which is not the one under exam. + return (DeadBlocks.count(Pred) || + (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); + }; + + auto IsNewlyDead = [&](BasicBlock *BB) { + // If all the edges to a block are dead, the block is also dead. + return (!DeadBlocks.count(BB) && + llvm::all_of(predecessors(BB), + [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); + }; + + for (BasicBlock *Succ : successors(CurrBB)) { + if (Succ == NextBB || !IsNewlyDead(Succ)) + continue; + SmallVector NewDead; + NewDead.push_back(Succ); + while (!NewDead.empty()) { + BasicBlock *Dead = NewDead.pop_back_val(); + if (DeadBlocks.insert(Dead)) + // Continue growing the dead block lists. + for (BasicBlock *S : successors(Dead)) + if (IsNewlyDead(S)) + NewDead.push_back(S); + } + } +} + /// \brief Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not @@ -1649,7 +1775,10 @@ Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } @@ -1657,7 +1786,10 @@ Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor()); + BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } Index: llvm/trunk/test/Transforms/Inline/AArch64/phi.ll =================================================================== --- llvm/trunk/test/Transforms/Inline/AArch64/phi.ll +++ llvm/trunk/test/Transforms/Inline/AArch64/phi.ll @@ -0,0 +1,504 @@ +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -o - < %s -inline-threshold=0 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +declare void @pad() +@glbl = external global i32 + +define i1 @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call i1 @inner1 + %C = call i1 @inner1() + ret i1 %C +} + +define i1 @inner1() { +entry: + br label %if_true + +if_true: + %phi = phi i1 [0, %entry], [%phi, %if_true] ; Simplified to 0 + br i1 %phi, label %if_true, label %exit + +exit: + store i32 0, i32* @glbl + store i32 1, i32* @glbl + store i32 2, i32* @glbl + store i32 3, i32* @glbl + store i32 4, i32* @glbl + ret i1 %phi +} + + +define i1 @outer2(i1 %val) { +; CHECK-LABEL: @outer2( +; CHECK: call i1 @inner2 + %C = call i1 @inner2(i1 %val) + ret i1 %C +} + +define i1 @inner2(i1 %val) { +entry: + br label %if_true + +if_true: + %phi = phi i1 [%val, %entry], [%phi, %if_true] ; Cannot be simplified to a constant + br i1 %phi, label %if_true, label %exit + +exit: + call void @pad() + ret i1 %phi +} + + +define i1 @outer3(i1 %cond) { +; CHECK-LABEL: @outer3( +; CHECK-NOT: call i1 @inner3 + %C = call i1 @inner3(i1 %cond) + ret i1 %C +} + +define i1 @inner3(i1 %cond) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [0, %entry], [0, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + store i32 0, i32* @glbl + store i32 1, i32* @glbl + store i32 2, i32* @glbl + store i32 3, i32* @glbl + store i32 4, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer4(i1 %cond) { +; CHECK-LABEL: @outer4( +; CHECK-NOT: call i1 @inner4 + %C = call i1 @inner4(i1 %cond, i32 0) + ret i1 %C +} + +define i1 @inner4(i1 %cond, i32 %val) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [0, %entry], [%val, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp +} + + +define i1 @outer5_1(i1 %cond) { +; CHECK-LABEL: @outer5_1( +; CHECK-NOT: call i1 @inner5 + %C = call i1 @inner5(i1 %cond, i32 0, i32 0) + ret i1 %C +} + + +define i1 @outer5_2(i1 %cond) { +; CHECK-LABEL: @outer5_2( +; CHECK: call i1 @inner5 + %C = call i1 @inner5(i1 %cond, i32 0, i32 1) + ret i1 %C +} + +define i1 @inner5(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Can be simplified to a constant if %val1 and %val2 are the same constants + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer6(i1 %cond, i32 %val) { +; CHECK-LABEL: @outer6( +; CHECK-NOT: call i1 @inner6 + %C = call i1 @inner6(i1 true, i32 %val, i32 0) + ret i1 %C +} + +define i1 @inner6(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer7(i1 %cond, i32 %val) { +; CHECK-LABEL: @outer7( +; CHECK-NOT: call i1 @inner7 + %C = call i1 @inner7(i1 false, i32 0, i32 %val) + ret i1 %C +} + +define i1 @inner7(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer8_1() { +; CHECK-LABEL: @outer8_1( +; CHECK-NOT: call i1 @inner8 + %C = call i1 @inner8(i32 0) + ret i1 %C +} + + + +define i1 @outer8_2() { +; CHECK-LABEL: @outer8_2( +; CHECK-NOT: call i1 @inner8 + %C = call i1 @inner8(i32 3) + ret i1 %C +} + +define i1 @inner8(i32 %cond) { +entry: + switch i32 %cond, label %default [ i32 0, label %zero + i32 1, label %one + i32 2, label %two ] + +zero: + br label %exit + +one: + br label %exit + +two: + br label %exit + +default: + br label %exit + +exit: + %phi = phi i32 [0, %zero], [1, %one], [2, %two], [-1, %default] ; Can be simplified to a constant if the switch condition is known + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp +} + + +define i1 @outer9(i1 %cond) { +; CHECK-LABEL: @outer9( +; CHECK-NOT: call i1 @inner9 + %C = call i1 @inner9(i32 0, i1 %cond) + ret i1 %C +} + +define i1 @inner9(i32 %cond1, i1 %cond2) { +entry: + switch i32 %cond1, label %exit [ i32 0, label %zero + i32 1, label %one + i32 2, label %two ] + +zero: + br label %exit + +one: + br label %exit + +two: + br i1 %cond2, label %two_true, label %two_false + +two_true: + br label %exit + +two_false: + br label %exit + +exit: + %phi = phi i32 [0, %zero], [1, %one], [2, %two_true], [2, %two_false], [-1, %entry] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + ret i1 %cmp +} + + +define i32 @outer10(i1 %cond) { +; CHECK-LABEL: @outer10( +; CHECK-NOT: call i32 @inner10 + %A = alloca i32 + %C = call i32 @inner10(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner10(i1 %cond, i32* %A) { +entry: + br label %if_true + +if_true: + %phi = phi i32* [%A, %entry], [%phi, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + br i1 %cond, label %if_true, label %exit + +exit: + call void @pad() + ret i32 %load +} + + +define i32 @outer11(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer11( +; CHECK: call i32 @inner11 + %C = call i32 @inner11(i1 %cond, i32* %ptr) + ret i32 %C +} + +define i32 @inner11(i1 %cond, i32* %ptr) { +entry: + br label %if_true + +if_true: + %phi = phi i32* [%ptr, %entry], [%phi, %if_true] ; Cannot be simplified + %load = load i32, i32* %phi + br i1 %cond, label %if_true, label %exit + +exit: + call void @pad() + ret i32 %load +} + + +define i32 @outer12(i1 %cond) { +; CHECK-LABEL: @outer12( +; CHECK-NOT: call i32 @inner12 + %A = alloca i32 + %C = call i32 @inner12(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner12(i1 %cond, i32* %ptr) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr, %entry], [%ptr, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + ret i32 %load +} + + +define i32 @outer13(i1 %cond) { +; CHECK-LABEL: @outer13( +; CHECK-NOT: call i32 @inner13 + %A = alloca i32 + %C = call i32 @inner13(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner13(i1 %cond, i32* %ptr) { +entry: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 2 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 1 + br i1 %cond, label %if_true, label %exit + +if_true: + %gep3 = getelementptr inbounds i32, i32* %gep2, i32 1 + br label %exit + +exit: + %phi = phi i32* [%gep1, %entry], [%gep3, %if_true] ; Simplifeid to %gep1 + %load = load i32, i32* %phi + call void @pad() + ret i32 %load +} + + +define i32 @outer14(i1 %cond) { +; CHECK-LABEL: @outer14( +; CHECK: call i32 @inner14 + %A1 = alloca i32 + %A2 = alloca i32 + %C = call i32 @inner14(i1 %cond, i32* %A1, i32* %A2) + ret i32 %C +} + +define i32 @inner14(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Cannot be simplified + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + ret i32 %load +} + + +define i32 @outer15(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer15( +; CHECK-NOT: call i32 @inner15 + %A = alloca i32 + %C = call i32 @inner15(i1 true, i32* %ptr, i32* %A) + ret i32 %C +} + +define i32 @inner15(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %load +} + + +define i32 @outer16(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer16( +; CHECK-NOT: call i32 @inner16 + %A = alloca i32 + %C = call i32 @inner16(i1 false, i32* %A, i32* %ptr) + ret i32 %C +} + +define i32 @inner16(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %load +} + + +define i1 @outer17(i1 %cond) { +; CHECK-LABEL: @outer17( +; CHECK: call i1 @inner17 + %A = alloca i32 + %C = call i1 @inner17(i1 %cond, i32* %A) + ret i1 %C +} + +define i1 @inner17(i1 %cond, i32* %ptr) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [null, %entry], [%ptr, %if_true] ; Cannot be mapped to a constant + %cmp = icmp eq i32* %phi, null + call void @pad() + ret i1 %cmp +} + + +define i1 @outer18(i1 %cond) { +; CHECK-LABEL: @outer18( +; CHECK-NOT: call i1 @inner18 + %C = call i1 @inner18(i1 %cond, i1 true) + ret i1 %C +} + +define i1 @inner18(i1 %cond1, i1 %cond2) { +entry: + br i1 %cond1, label %block1, label %block2 + +block1: + br i1 %cond2, label %block3, label %block4 + +block2: + br i1 %cond2, label %block5, label %block4 + +block3: + %phi = phi i32 [0, %block1], [1, %block4], [0, %block5] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp + +block4: ; Unreachable block + br label %block3 + +block5: + br label %block3 +} + + +define i1 @outer19(i1 %cond) { +; CHECK-LABEL: @outer19( +; CHECK: call i1 @inner19 + %A = alloca i32 + %C = call i1 @inner19(i1 %cond, i32* %A) + ret i1 %C +} + +define i1 @inner19(i1 %cond, i32* %ptr) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr, %entry], [null, %if_true] ; Cannot be mapped to a constant + %cmp = icmp eq i32* %phi, null + call void @pad() + ret i1 %cmp +}