Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -163,12 +164,20 @@ /// Keep track of values which map to a pointer base and constant offset. DenseMap> ConstantOffsetPtrs; + /// Kepp track of dead blocks due to the constant arguments. + SetVector DeadBlocks; + + /// The mapping of the blocks to their known successors due to the constant + /// arguments. + DenseMap KnownSuccessors; + // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); bool lookupSROAArgAndCost(Value *V, Value *&Arg, DenseMap::iterator &CostIt); void disableSROA(DenseMap::iterator CostIt); void disableSROA(Value *V); + void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); bool isGEPFree(GetElementPtrInst &GEP); @@ -419,15 +428,94 @@ } bool CallAnalyzer::visitPHI(PHINode &I) { - // FIXME: We should potentially be tracking values through phi nodes, - // especially when they collapse to a single value due to deleted CFG edges - // during inlining. - // FIXME: We need to propagate SROA *disabling* through phi nodes, even // though we don't want to propagate it's bonuses. The idea is to disable // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. + + APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits()); + bool CheckSROA = I.getType()->isPointerTy(); + + // Track the constant or pointer with constant offset we've seen so far. + Constant *FirstC = nullptr; + std::pair FirstBaseAndOffset = {nullptr, ZeroOffset}; + Value *FirstV = nullptr; + + for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = I.getIncomingBlock(i); + // If the incoming block is dead, skip the incoming block. + if (DeadBlocks.count(Pred)) + continue; + // If the parent block of phi is not the known successor of the incoming + // block, skip the incoming block. + BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; + if (KnownSuccessor && KnownSuccessor != I.getParent()) + continue; + + Value *V = I.getIncomingValue(i); + // If the incoming value is this phi itself, skip the incoming value. + if (&I == V) + continue; + + Constant *C = dyn_cast(V); + if (!C) + C = SimplifiedValues.lookup(V); + + if (C) { + if (!FirstC) + // If this is the 1st time we've seen a constant, record it. + FirstC = C; + else if (C != FirstC) + // If the incoming consant value is different from what we saw before, + // exit early. + return true; + + continue; + } + + if (!CheckSROA) + // The incoming value is not a constant and has no SROA opportunity, exit + // early. + return true; + + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(V); + + if (BaseAndOffset.first) { + if (!FirstV) { + // If this is the 1st time we've seen a pointer with constant offset, + // record it. + FirstV = V; + FirstBaseAndOffset = BaseAndOffset; + } else if (BaseAndOffset != FirstBaseAndOffset) + // If the incoming value is different from what we saw before, exit + // early. + return true; + + continue; + } + + // Eearly exit since there is no opportunity to propogate constants or + // pointers with constant offsets + return true; + } + + // Check if we can map phi to a constant. + if (FirstC) { + SimplifiedValues[&I] = FirstC; + return true; + } + + // Check if we can map phi to a pointer with constant offset. + if (FirstBaseAndOffset.first) { + ConstantOffsetPtrs[&I] = FirstBaseAndOffset; + + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(FirstV, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + } + return true; } @@ -1514,6 +1602,40 @@ return cast(ConstantInt::get(IntPtrTy, Offset)); } +/// \brief Find dead blocks due to deleted CFG edges during inlining. +/// +/// If we know the successor of the current block, \p CurrBB, has to be \p +/// NextBB, the other successors of \p CurrBB are dead if these successors have +/// no other predecessors or the other predecessors are also dead. If one block +/// is found to be dead, we can continue growing the dead block list by checking +/// the successors of the dead blocks to see if all their predecessors are dead. +void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { + for (BasicBlock *Succ : successors(CurrBB)) { + if (Succ == NextBB || DeadBlocks.count(Succ) || + // Succ is live if any of its predecessors other than CurrBB is live. + llvm::any_of(predecessors(Succ), [&](BasicBlock *Pred) { + return (Pred != CurrBB && !DeadBlocks.count(Pred)); + })) + continue; + SmallVector NewDead; + NewDead.push_back(Succ); + while (!NewDead.empty()) { + BasicBlock *Dead = NewDead.pop_back_val(); + if (DeadBlocks.insert(Dead)) { + // Continue growing the dead block lists. If all the predessors of the + // dead block's successor are all dead, the successor is also dead. + for (BasicBlock *S : successors(Dead)) { + if (!DeadBlocks.count(S) && + llvm::all_of(predecessors(S), [&](BasicBlock *P) { + return (DeadBlocks.count(P)); + })) + NewDead.push_back(S); + } + } + } + } +} + /// \brief Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not @@ -1651,7 +1773,10 @@ Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } @@ -1659,7 +1784,10 @@ Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond)->getCaseSuccessor()); + BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); + BBWorklist.insert(NextBB); + KnownSuccessors[BB] = NextBB; + findDeadBlocks(BB, NextBB); continue; } } Index: test/Transforms/Inline/AArch64/phi.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/AArch64/phi.ll @@ -0,0 +1,427 @@ +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -o - < %s -inline-threshold=0 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +declare void @pad() +@glbl = external global i32 + +define i1 @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call i1 @inner1 + %C = call i1 @inner1() + ret i1 %C +} + +define i1 @inner1() { +entry: + br label %if_true + +if_true: + %phi = phi i1 [0, %entry], [%phi, %if_true] ; Simplified to 0 + br i1 %phi, label %if_true, label %exit + +exit: + store i32 0, i32* @glbl + store i32 1, i32* @glbl + store i32 2, i32* @glbl + store i32 3, i32* @glbl + store i32 4, i32* @glbl + ret i1 %phi +} + + +define i1 @outer2(i1 %val) { +; CHECK-LABEL: @outer2( +; CHECK: call i1 @inner2 + %C = call i1 @inner2(i1 %val) + ret i1 %C +} + +define i1 @inner2(i1 %val) { +entry: + br label %if_true + +if_true: + %phi = phi i1 [%val, %entry], [%phi, %if_true] ; Cannot be simplified to a constant + br i1 %phi, label %if_true, label %exit + +exit: + call void @pad() + ret i1 %phi +} + + +define i1 @outer3(i1 %cond) { +; CHECK-LABEL: @outer3( +; CHECK-NOT: call i1 @inner3 + %C = call i1 @inner3(i1 %cond) + ret i1 %C +} + +define i1 @inner3(i1 %cond) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [0, %entry], [0, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + store i32 0, i32* @glbl + store i32 1, i32* @glbl + store i32 2, i32* @glbl + store i32 3, i32* @glbl + store i32 4, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer4(i1 %cond) { +; CHECK-LABEL: @outer4( +; CHECK-NOT: call i1 @inner4 + %C = call i1 @inner4(i1 %cond, i32 0) + ret i1 %C +} + +define i1 @inner4(i1 %cond, i32 %val) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [0, %entry], [%val, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp +} + + +define i1 @outer5_1(i1 %cond) { +; CHECK-LABEL: @outer5_1( +; CHECK-NOT: call i1 @inner5 + %C = call i1 @inner5(i1 %cond, i32 0, i32 0) + ret i1 %C +} + + +define i1 @outer5_2(i1 %cond) { +; CHECK-LABEL: @outer5_2( +; CHECK: call i1 @inner5 + %C = call i1 @inner5(i1 %cond, i32 0, i32 1) + ret i1 %C +} + +define i1 @inner5(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Can be simplified to a constant if %val1 and %val2 are the same constants + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer6(i1 %cond, i32 %val) { +; CHECK-LABEL: @outer6( +; CHECK-NOT: call i1 @inner6 + %C = call i1 @inner6(i1 true, i32 %val, i32 0) + ret i1 %C +} + +define i1 @inner6(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer7(i1 %cond, i32 %val) { +; CHECK-LABEL: @outer7( +; CHECK-NOT: call i1 @inner7 + %C = call i1 @inner7(i1 false, i32 0, i32 %val) + ret i1 %C +} + +define i1 @inner7(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer8_1() { +; CHECK-LABEL: @outer8_1( +; CHECK-NOT: call i1 @inner8 + %C = call i1 @inner8(i32 0) + ret i1 %C +} + + + +define i1 @outer8_2() { +; CHECK-LABEL: @outer8_2( +; CHECK-NOT: call i1 @inner8 + %C = call i1 @inner8(i32 3) + ret i1 %C +} + +define i1 @inner8(i32 %cond) { +entry: + switch i32 %cond, label %default [ i32 0, label %zero + i32 1, label %one + i32 2, label %two ] + +zero: + br label %exit + +one: + br label %exit + +two: + br label %exit + +default: + br label %exit + +exit: + %phi = phi i32 [0, %zero], [1, %one], [2, %two], [-1, %default] ; Can be simplified to a constant if the switch condition is known + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp +} + + +define i1 @outer9(i1 %cond) { +; CHECK-LABEL: @outer9( +; CHECK-NOT: call i1 @inner9 + %C = call i1 @inner9(i32 0, i1 %cond) + ret i1 %C +} + +define i1 @inner9(i32 %cond1, i1 %cond2) { +entry: + switch i32 %cond1, label %exit [ i32 0, label %zero + i32 1, label %one + i32 2, label %two ] + +zero: + br label %exit + +one: + br label %exit + +two: + br i1 %cond2, label %two_true, label %two_false + +two_true: + br label %exit + +two_false: + br label %exit + +exit: + %phi = phi i32 [0, %zero], [1, %one], [2, %two_true], [2, %two_false], [-1, %entry] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + ret i1 %cmp +} + + +define i32 @outer10(i1 %cond) { +; CHECK-LABEL: @outer10( +; CHECK-NOT: call i32 @inner10 + %A = alloca i32 + %C = call i32 @inner10(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner10(i1 %cond, i32* %A) { +entry: + br label %if_true + +if_true: + %phi = phi i32* [%A, %entry], [%phi, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + br i1 %cond, label %if_true, label %exit + +exit: + call void @pad() + ret i32 %load +} + + +define i32 @outer11(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer11( +; CHECK: call i32 @inner11 + %C = call i32 @inner11(i1 %cond, i32* %ptr) + ret i32 %C +} + +define i32 @inner11(i1 %cond, i32* %ptr) { +entry: + br label %if_true + +if_true: + %phi = phi i32* [%ptr, %entry], [%phi, %if_true] ; Cannot be simplified + %load = load i32, i32* %phi + br i1 %cond, label %if_true, label %exit + +exit: + call void @pad() + ret i32 %load +} + + +define i32 @outer12(i1 %cond) { +; CHECK-LABEL: @outer12( +; CHECK-NOT: call i32 @inner12 + %A = alloca i32 + %C = call i32 @inner12(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner12(i1 %cond, i32* %ptr) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr, %entry], [%ptr, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + ret i32 %load +} + + +define i32 @outer13(i1 %cond) { +; CHECK-LABEL: @outer13( +; CHECK-NOT: call i32 @inner13 + %A = alloca i32 + %C = call i32 @inner13(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner13(i1 %cond, i32* %ptr) { +entry: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 2 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 1 + br i1 %cond, label %if_true, label %exit + +if_true: + %gep3 = getelementptr inbounds i32, i32* %gep2, i32 1 + br label %exit + +exit: + %phi = phi i32* [%gep1, %entry], [%gep3, %if_true] ; Simplifeid to %gep1 + %load = load i32, i32* %phi + call void @pad() + ret i32 %load +} + + +define i32 @outer14(i1 %cond) { +; CHECK-LABEL: @outer14( +; CHECK: call i32 @inner14 + %A1 = alloca i32 + %A2 = alloca i32 + %C = call i32 @inner14(i1 %cond, i32* %A1, i32* %A2) + ret i32 %C +} + +define i32 @inner14(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Cannot be simplified + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + ret i32 %load +} + + +define i32 @outer15(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer15( +; CHECK-NOT: call i32 @inner15 + %A = alloca i32 + %C = call i32 @inner15(i1 true, i32* %ptr, i32* %A) + ret i32 %C +} + +define i32 @inner15(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %load +} + + +define i32 @outer16(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer16( +; CHECK-NOT: call i32 @inner16 + %A = alloca i32 + %C = call i32 @inner16(i1 false, i32* %A, i32* %ptr) + ret i32 %C +} + +define i32 @inner16(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %load +}