Please use GitHub pull requests for new patches. Avoid migrating existing patches. Phabricator shutdown timeline
Changeset View
Changeset View
Standalone View
Standalone View
llvm/trunk/lib/Transforms/Scalar/GVNSink.cpp
//===- GVNSink.cpp - sink expressions into successors -------------------===// | |||||
// | |||||
// The LLVM Compiler Infrastructure | |||||
// | |||||
// This file is distributed under the University of Illinois Open Source | |||||
// License. See LICENSE.TXT for details. | |||||
// | |||||
//===----------------------------------------------------------------------===// | |||||
// | |||||
/// \file GVNSink.cpp | |||||
/// This pass attempts to sink instructions into successors, reducing static | |||||
/// instruction count and enabling if-conversion. | |||||
/// | |||||
/// We use a variant of global value numbering to decide what can be sunk. | |||||
/// Consider: | |||||
/// | |||||
/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] | |||||
/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] | |||||
/// \ / | |||||
/// [ %e = phi i32 %a2, %c2 ] | |||||
/// [ add i32 %e, 4 ] | |||||
/// | |||||
/// | |||||
/// GVN would number %a1 and %c1 differently because they compute different | |||||
/// results - the VN of an instruction is a function of its opcode and the | |||||
/// transitive closure of its operands. This is the key property for hoisting | |||||
/// and CSE. | |||||
/// | |||||
/// What we want when sinking however is for a numbering that is a function of | |||||
/// the *uses* of an instruction, which allows us to answer the question "if I | |||||
/// replace %a1 with %c1, will it contribute in an equivalent way to all | |||||
/// successive instructions?". The PostValueTable class in GVN provides this | |||||
/// mapping. | |||||
/// | |||||
//===----------------------------------------------------------------------===// | |||||
#include "llvm/ADT/DenseMap.h" | |||||
#include "llvm/ADT/DenseMapInfo.h" | |||||
#include "llvm/ADT/DenseSet.h" | |||||
#include "llvm/ADT/Hashing.h" | |||||
#include "llvm/ADT/Optional.h" | |||||
#include "llvm/ADT/PostOrderIterator.h" | |||||
#include "llvm/ADT/SCCIterator.h" | |||||
#include "llvm/ADT/SmallPtrSet.h" | |||||
#include "llvm/ADT/Statistic.h" | |||||
#include "llvm/ADT/StringExtras.h" | |||||
#include "llvm/Analysis/GlobalsModRef.h" | |||||
#include "llvm/Analysis/MemorySSA.h" | |||||
#include "llvm/Analysis/PostDominators.h" | |||||
#include "llvm/Analysis/TargetTransformInfo.h" | |||||
#include "llvm/Analysis/ValueTracking.h" | |||||
#include "llvm/IR/Instructions.h" | |||||
#include "llvm/IR/Verifier.h" | |||||
#include "llvm/Support/MathExtras.h" | |||||
#include "llvm/Transforms/Scalar.h" | |||||
#include "llvm/Transforms/Scalar/GVN.h" | |||||
#include "llvm/Transforms/Scalar/GVNExpression.h" | |||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||||
#include "llvm/Transforms/Utils/Local.h" | |||||
#include <unordered_set> | |||||
using namespace llvm; | |||||
#define DEBUG_TYPE "gvn-sink" | |||||
STATISTIC(NumRemoved, "Number of instructions removed"); | |||||
namespace { | |||||
static bool isMemoryInst(const Instruction *I) { | |||||
return isa<LoadInst>(I) || isa<StoreInst>(I) || | |||||
(isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) || | |||||
(isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory()); | |||||
} | |||||
/// Iterates through instructions in a set of blocks in reverse order from the | |||||
/// first non-terminator. For example (assume all blocks have size n): | |||||
/// LockstepReverseIterator I([B1, B2, B3]); | |||||
/// *I-- = [B1[n], B2[n], B3[n]]; | |||||
/// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; | |||||
/// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; | |||||
/// ... | |||||
/// | |||||
/// It continues until all blocks have been exhausted. Use \c getActiveBlocks() | |||||
/// to | |||||
/// determine which blocks are still going and the order they appear in the | |||||
/// list returned by operator*. | |||||
class LockstepReverseIterator { | |||||
ArrayRef<BasicBlock *> Blocks; | |||||
SmallPtrSet<BasicBlock *, 4> ActiveBlocks; | |||||
SmallVector<Instruction *, 4> Insts; | |||||
bool Fail; | |||||
public: | |||||
LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) { | |||||
reset(); | |||||
} | |||||
void reset() { | |||||
Fail = false; | |||||
ActiveBlocks.clear(); | |||||
for (BasicBlock *BB : Blocks) | |||||
ActiveBlocks.insert(BB); | |||||
Insts.clear(); | |||||
for (BasicBlock *BB : Blocks) { | |||||
if (BB->size() <= 1) { | |||||
// Block wasn't big enough - only contained a terminator. | |||||
ActiveBlocks.erase(BB); | |||||
continue; | |||||
} | |||||
Insts.push_back(BB->getTerminator()->getPrevNode()); | |||||
} | |||||
if (Insts.empty()) | |||||
Fail = true; | |||||
} | |||||
bool isValid() const { return !Fail; } | |||||
ArrayRef<Instruction *> operator*() const { return Insts; } | |||||
SmallPtrSet<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; } | |||||
void restrictToBlocks(SmallPtrSetImpl<BasicBlock *> &Blocks) { | |||||
for (auto II = Insts.begin(); II != Insts.end();) { | |||||
if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == | |||||
Blocks.end()) { | |||||
ActiveBlocks.erase((*II)->getParent()); | |||||
II = Insts.erase(II); | |||||
} else { | |||||
++II; | |||||
} | |||||
} | |||||
} | |||||
void operator--() { | |||||
if (Fail) | |||||
return; | |||||
SmallVector<Instruction *, 4> NewInsts; | |||||
for (auto *Inst : Insts) { | |||||
if (Inst == &Inst->getParent()->front()) | |||||
ActiveBlocks.erase(Inst->getParent()); | |||||
else | |||||
NewInsts.push_back(Inst->getPrevNode()); | |||||
} | |||||
if (NewInsts.empty()) { | |||||
Fail = true; | |||||
return; | |||||
} | |||||
Insts = NewInsts; | |||||
} | |||||
}; | |||||
//===----------------------------------------------------------------------===// | |||||
/// Candidate solution for sinking. There may be different ways to | |||||
/// sink instructions, differing in the number of instructions sunk, | |||||
/// the number of predecessors sunk from and the number of PHIs | |||||
/// required. | |||||
struct SinkingInstructionCandidate { | |||||
unsigned NumBlocks; | |||||
unsigned NumInstructions; | |||||
unsigned NumPHIs; | |||||
unsigned NumMemoryInsts; | |||||
int Cost = -1; | |||||
SmallVector<BasicBlock *, 4> Blocks; | |||||
void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { | |||||
unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; | |||||
unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; | |||||
Cost = (NumInstructions * (NumBlocks - 1)) - | |||||
(NumExtraPHIs * | |||||
NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. | |||||
- SplitEdgeCost; | |||||
} | |||||
bool operator>=(const SinkingInstructionCandidate &Other) const { | |||||
return Cost >= Other.Cost; | |||||
} | |||||
}; | |||||
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, | |||||
const SinkingInstructionCandidate &C) { | |||||
OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks | |||||
<< " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">"; | |||||
return OS; | |||||
} | |||||
//===----------------------------------------------------------------------===// | |||||
/// Describes a PHI node that may or may not exist. These track the PHIs | |||||
/// that must be created if we sunk a sequence of instructions. It provides | |||||
/// a hash function for efficient equality comparisons. | |||||
class ModelledPHI { | |||||
SmallVector<Value *, 4> Values; | |||||
SmallVector<BasicBlock *, 4> Blocks; | |||||
public: | |||||
ModelledPHI() {} | |||||
ModelledPHI(const PHINode *PN) { | |||||
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) | |||||
Blocks.push_back(PN->getIncomingBlock(I)); | |||||
std::sort(Blocks.begin(), Blocks.end()); | |||||
// This assumes the PHI is already well-formed and there aren't conflicting | |||||
// incoming values for the same block. | |||||
for (auto *B : Blocks) | |||||
Values.push_back(PN->getIncomingValueForBlock(B)); | |||||
} | |||||
/// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI | |||||
/// without the same ID. | |||||
/// \note This is specifically for DenseMapInfo - do not use this! | |||||
static ModelledPHI createDummy(unsigned ID) { | |||||
ModelledPHI M; | |||||
M.Values.push_back(reinterpret_cast<Value*>(ID)); | |||||
return M; | |||||
} | |||||
/// Create a PHI from an array of incoming values and incoming blocks. | |||||
template <typename VArray, typename BArray> | |||||
ModelledPHI(const VArray &V, const BArray &B) { | |||||
std::copy(V.begin(), V.end(), std::back_inserter(Values)); | |||||
std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); | |||||
} | |||||
/// Create a PHI from [I[OpNum] for I in Insts]. | |||||
template <typename BArray> | |||||
ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) { | |||||
std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); | |||||
for (auto *I : Insts) | |||||
Values.push_back(I->getOperand(OpNum)); | |||||
} | |||||
/// Restrict the PHI's contents down to only \c NewBlocks. | |||||
/// \c NewBlocks must be a subset of \c this->Blocks. | |||||
void restrictToBlocks(const SmallPtrSetImpl<BasicBlock *> &NewBlocks) { | |||||
auto BI = Blocks.begin(); | |||||
auto VI = Values.begin(); | |||||
while (BI != Blocks.end()) { | |||||
assert(VI != Values.end()); | |||||
if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) == | |||||
NewBlocks.end()) { | |||||
BI = Blocks.erase(BI); | |||||
VI = Values.erase(VI); | |||||
} else { | |||||
++BI; | |||||
++VI; | |||||
} | |||||
} | |||||
assert(Blocks.size() == NewBlocks.size()); | |||||
} | |||||
ArrayRef<Value *> getValues() const { return Values; } | |||||
bool areAllIncomingValuesSame() const { | |||||
return all_of(Values, [&](Value *V) { return V == Values[0]; }); | |||||
} | |||||
bool areAllIncomingValuesSameType() const { | |||||
return all_of( | |||||
Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); | |||||
} | |||||
bool areAnyIncomingValuesConstant() const { | |||||
return any_of(Values, [&](Value *V) { return isa<Constant>(V); }); | |||||
} | |||||
// Hash functor | |||||
unsigned hash() const { | |||||
return (unsigned)hash_combine_range(Values.begin(), Values.end()); | |||||
} | |||||
bool operator==(const ModelledPHI &Other) const { | |||||
return Values == Other.Values && Blocks == Other.Blocks; | |||||
} | |||||
}; | |||||
template <typename ModelledPHI> struct DenseMapInfo { | |||||
static inline ModelledPHI &getEmptyKey() { | |||||
static ModelledPHI Dummy = ModelledPHI::createDummy(0); | |||||
return Dummy; | |||||
} | |||||
static inline ModelledPHI &getTombstoneKey() { | |||||
static ModelledPHI Dummy = ModelledPHI::createDummy(1); | |||||
return Dummy; | |||||
} | |||||
static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } | |||||
static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { | |||||
return LHS == RHS; | |||||
} | |||||
}; | |||||
typedef DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>> ModelledPHISet; | |||||
//===----------------------------------------------------------------------===// | |||||
// ValueTable | |||||
//===----------------------------------------------------------------------===// | |||||
// This is a value number table where the value number is a function of the | |||||
// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know | |||||
// that the program would be equivalent if we replaced A with PHI(A, B). | |||||
//===----------------------------------------------------------------------===// | |||||
/// A GVN expression describing how an instruction is used. The operands | |||||
/// field of BasicExpression is used to store uses, not operands. | |||||
/// | |||||
/// This class also contains fields for discriminators used when determining | |||||
/// equivalence of instructions with sideeffects. | |||||
class InstructionUseExpr : public GVNExpression::BasicExpression { | |||||
unsigned MemoryUseOrder = -1; | |||||
bool Volatile = false; | |||||
public: | |||||
InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R, | |||||
BumpPtrAllocator &A) | |||||
: GVNExpression::BasicExpression(I->getNumUses()) { | |||||
allocateOperands(R, A); | |||||
setOpcode(I->getOpcode()); | |||||
setType(I->getType()); | |||||
for (auto &U : I->uses()) | |||||
op_push_back(U.getUser()); | |||||
std::sort(op_begin(), op_end()); | |||||
} | |||||
void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } | |||||
void setVolatile(bool V) { Volatile = V; } | |||||
virtual hash_code getHashValue() const { | |||||
return hash_combine(GVNExpression::BasicExpression::getHashValue(), | |||||
MemoryUseOrder, Volatile); | |||||
} | |||||
template <typename Function> hash_code getHashValue(Function MapFn) { | |||||
hash_code H = | |||||
hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile); | |||||
for (auto *V : operands()) | |||||
H = hash_combine(H, MapFn(V)); | |||||
return H; | |||||
} | |||||
}; | |||||
class ValueTable { | |||||
DenseMap<Value *, uint32_t> ValueNumbering; | |||||
DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering; | |||||
DenseMap<size_t, uint32_t> HashNumbering; | |||||
BumpPtrAllocator Allocator; | |||||
ArrayRecycler<Value *> Recycler; | |||||
uint32_t nextValueNumber; | |||||
/// Create an expression for I based on its opcode and its uses. If I | |||||
/// touches or reads memory, the expression is also based upon its memory | |||||
/// order - see \c getMemoryUseOrder(). | |||||
InstructionUseExpr *createExpr(Instruction *I) { | |||||
InstructionUseExpr *E = | |||||
new (Allocator) InstructionUseExpr(I, Recycler, Allocator); | |||||
if (isMemoryInst(I)) | |||||
E->setMemoryUseOrder(getMemoryUseOrder(I)); | |||||
if (CmpInst *C = dyn_cast<CmpInst>(I)) { | |||||
CmpInst::Predicate Predicate = C->getPredicate(); | |||||
E->setOpcode((C->getOpcode() << 8) | Predicate); | |||||
} | |||||
return E; | |||||
} | |||||
/// Helper to compute the value number for a memory instruction | |||||
/// (LoadInst/StoreInst), including checking the memory ordering and | |||||
/// volatility. | |||||
template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) { | |||||
if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic()) | |||||
return nullptr; | |||||
InstructionUseExpr *E = createExpr(I); | |||||
E->setVolatile(I->isVolatile()); | |||||
return E; | |||||
} | |||||
public: | |||||
/// Returns the value number for the specified value, assigning | |||||
/// it a new number if it did not have one before. | |||||
uint32_t lookupOrAdd(Value *V) { | |||||
auto VI = ValueNumbering.find(V); | |||||
if (VI != ValueNumbering.end()) | |||||
return VI->second; | |||||
if (!isa<Instruction>(V)) { | |||||
ValueNumbering[V] = nextValueNumber; | |||||
return nextValueNumber++; | |||||
} | |||||
Instruction *I = cast<Instruction>(V); | |||||
InstructionUseExpr *exp = nullptr; | |||||
switch (I->getOpcode()) { | |||||
case Instruction::Load: | |||||
exp = createMemoryExpr(cast<LoadInst>(I)); | |||||
break; | |||||
case Instruction::Store: | |||||
exp = createMemoryExpr(cast<StoreInst>(I)); | |||||
break; | |||||
case Instruction::Call: | |||||
case Instruction::Invoke: | |||||
case Instruction::Add: | |||||
case Instruction::FAdd: | |||||
case Instruction::Sub: | |||||
case Instruction::FSub: | |||||
case Instruction::Mul: | |||||
case Instruction::FMul: | |||||
case Instruction::UDiv: | |||||
case Instruction::SDiv: | |||||
case Instruction::FDiv: | |||||
case Instruction::URem: | |||||
case Instruction::SRem: | |||||
case Instruction::FRem: | |||||
case Instruction::Shl: | |||||
case Instruction::LShr: | |||||
case Instruction::AShr: | |||||
case Instruction::And: | |||||
case Instruction::Or: | |||||
case Instruction::Xor: | |||||
case Instruction::ICmp: | |||||
case Instruction::FCmp: | |||||
case Instruction::Trunc: | |||||
case Instruction::ZExt: | |||||
case Instruction::SExt: | |||||
case Instruction::FPToUI: | |||||
case Instruction::FPToSI: | |||||
case Instruction::UIToFP: | |||||
case Instruction::SIToFP: | |||||
case Instruction::FPTrunc: | |||||
case Instruction::FPExt: | |||||
case Instruction::PtrToInt: | |||||
case Instruction::IntToPtr: | |||||
case Instruction::BitCast: | |||||
case Instruction::Select: | |||||
case Instruction::ExtractElement: | |||||
case Instruction::InsertElement: | |||||
case Instruction::ShuffleVector: | |||||
case Instruction::InsertValue: | |||||
case Instruction::GetElementPtr: | |||||
exp = createExpr(I); | |||||
break; | |||||
default: | |||||
break; | |||||
} | |||||
if (!exp) { | |||||
ValueNumbering[V] = nextValueNumber; | |||||
return nextValueNumber++; | |||||
} | |||||
uint32_t e = ExpressionNumbering[exp]; | |||||
if (!e) { | |||||
hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); }); | |||||
auto I = HashNumbering.find(H); | |||||
if (I != HashNumbering.end()) { | |||||
e = I->second; | |||||
} else { | |||||
e = nextValueNumber++; | |||||
HashNumbering[H] = e; | |||||
ExpressionNumbering[exp] = e; | |||||
} | |||||
} | |||||
ValueNumbering[V] = e; | |||||
return e; | |||||
} | |||||
/// Returns the value number of the specified value. Fails if the value has | |||||
/// not yet been numbered. | |||||
uint32_t lookup(Value *V) const { | |||||
auto VI = ValueNumbering.find(V); | |||||
assert(VI != ValueNumbering.end() && "Value not numbered?"); | |||||
return VI->second; | |||||
} | |||||
/// Removes all value numberings and resets the value table. | |||||
void clear() { | |||||
ValueNumbering.clear(); | |||||
ExpressionNumbering.clear(); | |||||
HashNumbering.clear(); | |||||
Recycler.clear(Allocator); | |||||
nextValueNumber = 1; | |||||
} | |||||
ValueTable() : nextValueNumber(1) {} | |||||
/// \c Inst uses or touches memory. Return an ID describing the memory state | |||||
/// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), | |||||
/// the exact same memory operations happen after I1 and I2. | |||||
/// | |||||
/// This is a very hard problem in general, so we use domain-specific | |||||
/// knowledge that we only ever check for equivalence between blocks sharing a | |||||
/// single immediate successor that is common, and when determining if I1 == | |||||
/// I2 we will have already determined that next(I1) == next(I2). This | |||||
/// inductive property allows us to simply return the value number of the next | |||||
/// instruction that defines memory. | |||||
uint32_t getMemoryUseOrder(Instruction *Inst) { | |||||
auto *BB = Inst->getParent(); | |||||
for (auto I = std::next(Inst->getIterator()), E = BB->end(); | |||||
I != E && !I->isTerminator(); ++I) { | |||||
if (!isMemoryInst(&*I)) | |||||
continue; | |||||
if (isa<LoadInst>(&*I)) | |||||
continue; | |||||
CallInst *CI = dyn_cast<CallInst>(&*I); | |||||
if (CI && CI->onlyReadsMemory()) | |||||
continue; | |||||
InvokeInst *II = dyn_cast<InvokeInst>(&*I); | |||||
if (II && II->onlyReadsMemory()) | |||||
continue; | |||||
return lookupOrAdd(&*I); | |||||
} | |||||
return 0; | |||||
} | |||||
}; | |||||
//===----------------------------------------------------------------------===// | |||||
class GVNSink { | |||||
public: | |||||
GVNSink() : VN() {} | |||||
bool run(Function &F) { | |||||
DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); | |||||
unsigned NumSunk = 0; | |||||
ReversePostOrderTraversal<Function*> RPOT(&F); | |||||
for (auto *N : RPOT) | |||||
NumSunk += sinkBB(N); | |||||
return NumSunk > 0; | |||||
} | |||||
private: | |||||
ValueTable VN; | |||||
bool isInstructionBlacklisted(Instruction *I) { | |||||
// These instructions may change or break semantics if moved. | |||||
if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) || | |||||
I->getType()->isTokenTy()) | |||||
return true; | |||||
return false; | |||||
} | |||||
/// The main heuristic function. Analyze the set of instructions pointed to by | |||||
/// LRI and return a candidate solution if these instructions can be sunk, or | |||||
/// None otherwise. | |||||
Optional<SinkingInstructionCandidate> analyzeInstructionForSinking( | |||||
LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, | |||||
ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents); | |||||
/// Create a ModelledPHI for each PHI in BB, adding to PHIs. | |||||
void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, | |||||
SmallPtrSetImpl<Value *> &PHIContents) { | |||||
for (auto &I : *BB) { | |||||
auto *PN = dyn_cast<PHINode>(&I); | |||||
if (!PN) | |||||
return; | |||||
auto MPHI = ModelledPHI(PN); | |||||
PHIs.insert(MPHI); | |||||
for (auto *V : MPHI.getValues()) | |||||
PHIContents.insert(V); | |||||
} | |||||
} | |||||
/// The main instruction sinking driver. Set up state and try and sink | |||||
/// instructions into BBEnd from its predecessors. | |||||
unsigned sinkBB(BasicBlock *BBEnd); | |||||
/// Perform the actual mechanics of sinking an instruction from Blocks into | |||||
/// BBEnd, which is their only successor. | |||||
void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd); | |||||
/// Remove PHIs that all have the same incoming value. | |||||
void foldPointlessPHINodes(BasicBlock *BB) { | |||||
auto I = BB->begin(); | |||||
while (PHINode *PN = dyn_cast<PHINode>(I++)) { | |||||
if (!all_of(PN->incoming_values(), | |||||
[&](const Value *V) { return V == PN->getIncomingValue(0); })) | |||||
continue; | |||||
if (PN->getIncomingValue(0) != PN) | |||||
PN->replaceAllUsesWith(PN->getIncomingValue(0)); | |||||
else | |||||
PN->replaceAllUsesWith(UndefValue::get(PN->getType())); | |||||
PN->eraseFromParent(); | |||||
} | |||||
} | |||||
}; | |||||
Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking( | |||||
LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, | |||||
ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) { | |||||
auto Insts = *LRI; | |||||
DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I | |||||
: Insts) { | |||||
I->dump(); | |||||
} dbgs() << " ]\n";); | |||||
DenseMap<uint32_t, unsigned> VNums; | |||||
for (auto *I : Insts) { | |||||
uint32_t N = VN.lookupOrAdd(I); | |||||
DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); | |||||
if (N == ~0U) | |||||
return None; | |||||
VNums[N]++; | |||||
} | |||||
unsigned VNumToSink = | |||||
std::max_element(VNums.begin(), VNums.end(), | |||||
[](const std::pair<uint32_t, unsigned> &I, | |||||
const std::pair<uint32_t, unsigned> &J) { | |||||
return I.second < J.second; | |||||
}) | |||||
->first; | |||||
if (VNums[VNumToSink] == 1) | |||||
// Can't sink anything! | |||||
return None; | |||||
// Now restrict the number of incoming blocks down to only those with | |||||
// VNumToSink. | |||||
auto &ActivePreds = LRI.getActiveBlocks(); | |||||
unsigned InitialActivePredSize = ActivePreds.size(); | |||||
SmallVector<Instruction *, 4> NewInsts; | |||||
for (auto *I : Insts) { | |||||
if (VN.lookup(I) != VNumToSink) | |||||
ActivePreds.erase(I->getParent()); | |||||
else | |||||
NewInsts.push_back(I); | |||||
} | |||||
for (auto *I : NewInsts) | |||||
if (isInstructionBlacklisted(I)) | |||||
return None; | |||||
// If we've restricted the incoming blocks, restrict all needed PHIs also | |||||
// to that set. | |||||
bool RecomputePHIContents = false; | |||||
if (ActivePreds.size() != InitialActivePredSize) { | |||||
ModelledPHISet NewNeededPHIs; | |||||
for (auto P : NeededPHIs) { | |||||
P.restrictToBlocks(ActivePreds); | |||||
NewNeededPHIs.insert(P); | |||||
} | |||||
NeededPHIs = NewNeededPHIs; | |||||
LRI.restrictToBlocks(ActivePreds); | |||||
RecomputePHIContents = true; | |||||
} | |||||
// The sunk instruction's results. | |||||
ModelledPHI NewPHI(NewInsts, ActivePreds); | |||||
// Does sinking this instruction render previous PHIs redundant? | |||||
if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) { | |||||
NeededPHIs.erase(NewPHI); | |||||
RecomputePHIContents = true; | |||||
} | |||||
if (RecomputePHIContents) { | |||||
// The needed PHIs have changed, so recompute the set of all needed | |||||
// values. | |||||
PHIContents.clear(); | |||||
for (auto &PHI : NeededPHIs) | |||||
PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); | |||||
} | |||||
// Is this instruction required by a later PHI that doesn't match this PHI? | |||||
// if so, we can't sink this instruction. | |||||
for (auto *V : NewPHI.getValues()) | |||||
if (PHIContents.count(V)) | |||||
// V exists in this PHI, but the whole PHI is different to NewPHI | |||||
// (else it would have been removed earlier). We cannot continue | |||||
// because this isn't representable. | |||||
return None; | |||||
// Which operands need PHIs? | |||||
// FIXME: If any of these fail, we should partition up the candidates to | |||||
// try and continue making progress. | |||||
Instruction *I0 = NewInsts[0]; | |||||
for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { | |||||
ModelledPHI PHI(NewInsts, OpNum, ActivePreds); | |||||
if (PHI.areAllIncomingValuesSame()) | |||||
continue; | |||||
if (!canReplaceOperandWithVariable(I0, OpNum)) | |||||
// We can 't create a PHI from this instruction! | |||||
return None; | |||||
if (NeededPHIs.count(PHI)) | |||||
continue; | |||||
if (!PHI.areAllIncomingValuesSameType()) | |||||
return None; | |||||
// Don't create indirect calls! The called value is the final operand. | |||||
if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 && | |||||
PHI.areAnyIncomingValuesConstant()) | |||||
return None; | |||||
NeededPHIs.reserve(NeededPHIs.size()); | |||||
NeededPHIs.insert(PHI); | |||||
PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); | |||||
} | |||||
if (isMemoryInst(NewInsts[0])) | |||||
++MemoryInstNum; | |||||
SinkingInstructionCandidate Cand; | |||||
Cand.NumInstructions = ++InstNum; | |||||
Cand.NumMemoryInsts = MemoryInstNum; | |||||
Cand.NumBlocks = ActivePreds.size(); | |||||
Cand.NumPHIs = NeededPHIs.size(); | |||||
for (auto *C : ActivePreds) | |||||
Cand.Blocks.push_back(C); | |||||
return Cand; | |||||
} | |||||
unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { | |||||
DEBUG(dbgs() << "GVNSink: running on basic block "; | |||||
BBEnd->printAsOperand(dbgs()); dbgs() << "\n"); | |||||
SmallVector<BasicBlock *, 4> Preds; | |||||
for (auto *B : predecessors(BBEnd)) { | |||||
auto *T = B->getTerminator(); | |||||
if (isa<BranchInst>(T) || isa<SwitchInst>(T)) | |||||
Preds.push_back(B); | |||||
else | |||||
return 0; | |||||
} | |||||
if (Preds.size() < 2) | |||||
return 0; | |||||
std::sort(Preds.begin(), Preds.end()); | |||||
unsigned NumOrigPreds = Preds.size(); | |||||
// We can only sink instructions through unconditional branches. | |||||
for (auto I = Preds.begin(); I != Preds.end();) { | |||||
if ((*I)->getTerminator()->getNumSuccessors() != 1) | |||||
I = Preds.erase(I); | |||||
else | |||||
++I; | |||||
} | |||||
LockstepReverseIterator LRI(Preds); | |||||
SmallVector<SinkingInstructionCandidate, 4> Candidates; | |||||
unsigned InstNum = 0, MemoryInstNum = 0; | |||||
ModelledPHISet NeededPHIs; | |||||
SmallPtrSet<Value *, 4> PHIContents; | |||||
analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); | |||||
unsigned NumOrigPHIs = NeededPHIs.size(); | |||||
while (LRI.isValid()) { | |||||
auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, | |||||
NeededPHIs, PHIContents); | |||||
if (!Cand) | |||||
break; | |||||
Cand->calculateCost(NumOrigPHIs, Preds.size()); | |||||
Candidates.emplace_back(*Cand); | |||||
--LRI; | |||||
} | |||||
std::stable_sort( | |||||
Candidates.begin(), Candidates.end(), | |||||
[](const SinkingInstructionCandidate &A, | |||||
const SinkingInstructionCandidate &B) { return A >= B; }); | |||||
DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C | |||||
: Candidates) dbgs() | |||||
<< " " << C << "\n";); | |||||
// Pick the top candidate, as long it is positive! | |||||
if (Candidates.empty() || Candidates.front().Cost <= 0) | |||||
return 0; | |||||
auto C = Candidates.front(); | |||||
DEBUG(dbgs() << " -- Sinking: " << C << "\n"); | |||||
BasicBlock *InsertBB = BBEnd; | |||||
if (C.Blocks.size() < NumOrigPreds) { | |||||
DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs()); | |||||
dbgs() << "\n"); | |||||
InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); | |||||
if (!InsertBB) { | |||||
DEBUG(dbgs() << " -- FAILED to split edge!\n"); | |||||
// Edge couldn't be split. | |||||
return 0; | |||||
} | |||||
} | |||||
for (unsigned I = 0; I < C.NumInstructions; ++I) | |||||
sinkLastInstruction(C.Blocks, InsertBB); | |||||
return C.NumInstructions; | |||||
} | |||||
void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, | |||||
BasicBlock *BBEnd) { | |||||
SmallVector<Instruction *, 4> Insts; | |||||
for (BasicBlock *BB : Blocks) | |||||
Insts.push_back(BB->getTerminator()->getPrevNode()); | |||||
Instruction *I0 = Insts.front(); | |||||
SmallVector<Value *, 4> NewOperands; | |||||
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { | |||||
bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { | |||||
return I->getOperand(O) != I0->getOperand(O); | |||||
}); | |||||
if (!NeedPHI) { | |||||
NewOperands.push_back(I0->getOperand(O)); | |||||
continue; | |||||
} | |||||
// Create a new PHI in the successor block and populate it. | |||||
auto *Op = I0->getOperand(O); | |||||
assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); | |||||
auto *PN = PHINode::Create(Op->getType(), Insts.size(), | |||||
Op->getName() + ".sink", &BBEnd->front()); | |||||
for (auto *I : Insts) | |||||
PN->addIncoming(I->getOperand(O), I->getParent()); | |||||
NewOperands.push_back(PN); | |||||
} | |||||
// Arbitrarily use I0 as the new "common" instruction; remap its operands | |||||
// and move it to the start of the successor block. | |||||
for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) | |||||
I0->getOperandUse(O).set(NewOperands[O]); | |||||
I0->moveBefore(&*BBEnd->getFirstInsertionPt()); | |||||
// Update metadata and IR flags. | |||||
for (auto *I : Insts) | |||||
if (I != I0) { | |||||
combineMetadataForCSE(I0, I); | |||||
I0->andIRFlags(I); | |||||
} | |||||
for (auto *I : Insts) | |||||
if (I != I0) | |||||
I->replaceAllUsesWith(I0); | |||||
foldPointlessPHINodes(BBEnd); | |||||
// Finally nuke all instructions apart from the common instruction. | |||||
for (auto *I : Insts) | |||||
if (I != I0) | |||||
I->eraseFromParent(); | |||||
NumRemoved += Insts.size() - 1; | |||||
} | |||||
//////////////////////////////////////////////////////////////////////////////// | |||||
// Pass machinery / boilerplate | |||||
class GVNSinkLegacyPass : public FunctionPass { | |||||
public: | |||||
static char ID; | |||||
GVNSinkLegacyPass() : FunctionPass(ID) { | |||||
initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); | |||||
} | |||||
bool runOnFunction(Function &F) override { | |||||
if (skipFunction(F)) | |||||
return false; | |||||
GVNSink G; | |||||
return G.run(F); | |||||
} | |||||
void getAnalysisUsage(AnalysisUsage &AU) const override { | |||||
AU.addPreserved<GlobalsAAWrapperPass>(); | |||||
} | |||||
}; | |||||
} // namespace | |||||
PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { | |||||
GVNSink G; | |||||
if (!G.run(F)) | |||||
return PreservedAnalyses::all(); | |||||
PreservedAnalyses PA; | |||||
PA.preserve<GlobalsAA>(); | |||||
return PA; | |||||
} | |||||
char GVNSinkLegacyPass::ID = 0; | |||||
INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", | |||||
"Early GVN sinking of Expressions", false, false) | |||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) | |||||
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) | |||||
INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink", | |||||
"Early GVN sinking of Expressions", false, false) | |||||
FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } |