Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -126,6 +126,7 @@ void initializeEfficiencySanitizerPass(PassRegistry&); void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &); void initializeGVNHoistLegacyPassPass(PassRegistry &); +void initializeGVNSinkLegacyPassPass(PassRegistry &); void initializeExpandISelPseudosPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExternalAAWrapperPassPass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -333,6 +333,12 @@ //===----------------------------------------------------------------------===// // +// JMJM +// +FunctionPass *createGVNSinkPass(); + +//===----------------------------------------------------------------------===// +// // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // Index: include/llvm/Transforms/Scalar/GVN.h =================================================================== --- include/llvm/Transforms/Scalar/GVN.h +++ include/llvm/Transforms/Scalar/GVN.h @@ -64,6 +64,7 @@ /// as an efficient mechanism to determine the expression-wise equivalence of /// two values. class ValueTable { + protected: DenseMap valueNumbering; DenseMap expressionNumbering; AliasAnalysis *AA; @@ -72,17 +73,20 @@ uint32_t nextValueNumber; - Expression createExpr(Instruction *I); - Expression createCmpExpr(unsigned Opcode, CmpInst::Predicate Predicate, - Value *LHS, Value *RHS); - Expression createExtractvalueExpr(ExtractValueInst *EI); - uint32_t lookupOrAddCall(CallInst *C); - + virtual Expression createExpr(Instruction *I); + virtual Expression createCmpExpr(unsigned Opcode, CmpInst::Predicate Predicate, + Value *LHS, Value *RHS); + virtual Expression createExtractvalueExpr(ExtractValueInst *EI); + virtual uint32_t lookupOrAddCall(CallInst *C); + virtual uint32_t lookupOrAddLoad(LoadInst *LI); + virtual uint32_t lookupOrAddStore(StoreInst *LI); + void modify(Value *V, uint32_t num); + public: ValueTable(); ValueTable(const ValueTable &Arg); ValueTable(ValueTable &&Arg); - ~ValueTable(); + virtual ~ValueTable(); uint32_t lookupOrAdd(Value *V); uint32_t lookup(Value *V) const; @@ -100,6 +104,25 @@ void verifyRemoved(const Value *) const; }; + /// This map computes a corrolary function to ValueTable. Where ValueTable + /// computes a value number using a function of operation & transitive + /// operands, this uses a function of operation & transitive *uses*. + /// + /// FIXME: Give a fuller description here. + class PostValueTable : public ValueTable { + Expression createExpr(Instruction *I) override; + Expression createCmpExpr(unsigned Opcode, CmpInst::Predicate Predicate, + Value *LHS, Value *RHS) override; + Expression createExtractvalueExpr(ExtractValueInst *EI) override; + uint32_t lookupOrAddCall(CallInst *C) override; + uint32_t lookupOrAddLoad(LoadInst *LI) override; + uint32_t lookupOrAddStore(StoreInst *LI) override; + bool IgnoreMemDeps = false; + public: + void setIgnoreMemoryDependencies(bool Ignore) { IgnoreMemDeps = Ignore; } + ~PostValueTable() override; + }; + private: friend class gvn::GVNLegacyPass; friend struct DenseMapInfo; @@ -234,6 +257,11 @@ /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; +/// JM +struct GVNSinkPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- include/llvm/Transforms/Utils/MemorySSA.h +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -477,6 +477,7 @@ MemorySSA(Function &, AliasAnalysis *, DominatorTree *); ~MemorySSA(); + void invalidateAll(); MemorySSAWalker *getWalker(); /// \brief Given a memory Mod/Ref'ing instruction, get the MemorySSA Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -146,6 +146,10 @@ "enable-gvn-hoist", cl::init(true), cl::Hidden, cl::desc("Enable the GVN hoisting pass (default = on)")); +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(true), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = on)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -247,6 +251,8 @@ FPM.add(createEarlyCSEPass()); if(EnableGVNHoist) FPM.add(createGVNHoistPass()); + if(EnableGVNSink) + FPM.add(createGVNSinkPass()); FPM.add(createLowerExpectIntrinsicPass()); } Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -13,6 +13,7 @@ GuardWidening.cpp GVN.cpp GVNHoist.cpp + GVNSink.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp JumpThreading.cpp Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -334,6 +334,86 @@ } //===----------------------------------------------------------------------===// +// PostValueTable Internal Functions +//===----------------------------------------------------------------------===// + +GVN::PostValueTable::~PostValueTable() { +} + +GVN::Expression GVN::PostValueTable::createExpr(Instruction *I) { + Expression e; + e.type = I->getType(); + e.opcode = I->getOpcode(); + for (auto &U : I->uses()) + e.varargs.push_back(lookupOrAdd(U.getUser())); + std::sort(e.varargs.begin(), e.varargs.end()); + + if (CmpInst *C = dyn_cast(I)) { + CmpInst::Predicate Predicate = C->getPredicate(); + e.opcode = (C->getOpcode() << 8) | Predicate; + } else if (isa(I)) { + assert(0 && "Implement!"); + } + + return e; +} + +GVN::Expression GVN::PostValueTable::createCmpExpr(unsigned Opcode, + CmpInst::Predicate Predicate, + Value *LHS, Value *RHS) { + llvm_unreachable("Not implemented!"); +} + +GVN::Expression GVN::PostValueTable::createExtractvalueExpr(ExtractValueInst *EI) { + return createExpr(EI); +} + +uint32_t GVN::PostValueTable::lookupOrAddCall(CallInst *I) { + if (AA->doesNotAccessMemory(I)) { + Expression exp = createExpr(I); + uint32_t &e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[I] = e; + return e; + } else if (IgnoreMemDeps) { + Expression exp = createExpr(I); + uint32_t &e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[I] = e; + return e; + } else { + valueNumbering[I] = nextValueNumber; + return nextValueNumber++; + } +} + +uint32_t GVN::PostValueTable::lookupOrAddLoad(LoadInst *I) { + if (IgnoreMemDeps) { + Expression exp = createExpr(I); + uint32_t &e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[I] = e; + return e; + } else { + valueNumbering[I] = nextValueNumber; + return nextValueNumber++; + } +} + +uint32_t GVN::PostValueTable::lookupOrAddStore(StoreInst *I) { + if (IgnoreMemDeps) { + Expression exp = createExpr(I); + uint32_t &e = expressionNumbering[exp]; + if (!e) e = nextValueNumber++; + valueNumbering[I] = e; + return e; + } else { + valueNumbering[I] = nextValueNumber; + return nextValueNumber++; + } +} + +//===----------------------------------------------------------------------===// // ValueTable External Functions //===----------------------------------------------------------------------===// @@ -354,6 +434,10 @@ valueNumbering.insert(std::make_pair(V, num)); } +void GVN::ValueTable::modify(Value *V, uint32_t num) { + valueNumbering[V] = num; +} + uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = createExpr(C); @@ -463,6 +547,16 @@ } } +uint32_t GVN::ValueTable::lookupOrAddLoad(LoadInst *LI) { + valueNumbering[LI] = nextValueNumber; + return nextValueNumber++; +} + +uint32_t GVN::ValueTable::lookupOrAddStore(StoreInst *SI) { + valueNumbering[SI] = nextValueNumber; + return nextValueNumber++; +} + /// Returns true if a value number exists for the specified value. bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; } @@ -483,6 +577,10 @@ switch (I->getOpcode()) { case Instruction::Call: return lookupOrAddCall(cast(I)); + case Instruction::Load: + return lookupOrAddLoad(cast(I)); + case Instruction::Store: + return lookupOrAddStore(cast(I)); case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: Index: lib/Transforms/Scalar/GVNSink.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/GVNSink.cpp @@ -0,0 +1,813 @@ +//===- GVNSink.cpp - sink expressions into successors -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass attempts to sink instructions into successors, reducing static +// instruction count and enabling if-conversion. +// +// We use a variant of global value numbering to decide what can be sunk. +// Consider: +// +// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] +// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] +// \ / +// [ %e = phi i32 %a2, %c2 ] +// [ add i32 %e, 4 ] +// +// +// GVN would number %a1 and %c1 differently because they compute different +// results - the VN of an instruction is a function of its opcode and the +// transitive closure of its operands. This is the key property for hoisting +// and CSE. +// +// What we want when sinking however is for a numbering that is a function of +// the *uses* of an instruction, which allows us to answer the question "if I +// replace %a1 with %c1, will it contribute in an equivalent way to all +// successive instructions?". The PostValueTable class in GVN provides this +// mapping. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "gvn-sink" + +STATISTIC(NumRemoved, "Number of instructions removed"); + +namespace { + +// FIXME: Invoke +static bool isMemoryInst(Instruction *I) { + return isa(I) || isa(I) || + (isa(I) && !cast(I)->doesNotAccessMemory()); +} + +// LockstepReverseIterator - Iterates through instructions in a set of blocks in +// reverse order from the first non-terminator. +// For example (assume all blocks have size n): +// LockstepReverseIterator I([B1, B2, B3]); +// *I-- = [B1[n], B2[n], B3[n]]; +// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; +// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; +// ... +// +// It continues until all blocks have been exhausted. Use \c getActiveBlocks() to +// determine which blocks are still going and the order they appear in the +// list returned by operator*. +class LockstepReverseIterator { + ArrayRef Blocks; + SmallPtrSet ActiveBlocks; + SmallVector Insts; + bool Fail; +public: + LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + ActiveBlocks.clear(); + for (auto *BB : Blocks) + ActiveBlocks.insert(BB); + Insts.clear(); + for (auto *BB : Blocks) { + if (BB->size() <= 1) { + // Block wasn't big enough - only contained a terminator. + ActiveBlocks.erase(BB); + continue; + } + Insts.push_back(BB->getTerminator()->getPrevNode()); + } + if (Insts.empty()) + Fail = true; + } + + bool isValid() const { return !Fail; } + ArrayRef operator*() const { return Insts; } + SmallPtrSet &getActiveBlocks() { return ActiveBlocks; } + + void restrictToBlocks(SmallPtrSetImpl &Blocks) { + auto II = Insts.begin(); + for (; II != Insts.end();) { + if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == + Blocks.end()) { + ActiveBlocks.erase((*II)->getParent()); + II = Insts.erase(II); + } else { + ++II; + } + } + } + + void operator -- () { + if (Fail) + return; + SmallVector NewInsts; + for (auto *&Inst : Insts) { + if (Inst == &Inst->getParent()->front()) + ActiveBlocks.erase(Inst->getParent()); + else + NewInsts.push_back(Inst->getPrevNode()); + } + if (NewInsts.empty()) { + Fail = true; + return; + } + Insts = NewInsts; + } +}; + +//===----------------------------------------------------------------------===// + +/// Candidate solution for sinking. There may be different ways to +/// sink instructions, differing in the number of instructions sunk, +/// the number of predecessors sunk from and the number of PHIs +/// required. +struct SinkingInstructionCandidate { + unsigned NumBlocks; + unsigned NumInstructions; + unsigned NumPHIs; + unsigned NumMemoryInsts; + int Cost = -1; + SmallVector Blocks; + + void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { + unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; + unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; + Cost = (NumInstructions * (NumBlocks - 1)) - + (NumExtraPHIs * + NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. + - SplitEdgeCost; + } + bool operator<(const SinkingInstructionCandidate &Other) const { + return Cost < Other.Cost; + } +}; + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + SinkingInstructionCandidate &C) { + OS << ""; + return OS; +} + +//===----------------------------------------------------------------------===// + +/// Describes a PHI node that may or may not exist. These track the PHIs +/// that must be created if we sunk a sequence of instructions. It provides +/// a hash function for efficient equality comparisons. +class ModelledPHI { + SmallVector Values; + SmallVector Blocks; +public: + ModelledPHI() {} + ModelledPHI(PHINode *PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) + Blocks.push_back(PN->getIncomingBlock(I)); + std::stable_sort(Blocks.begin(), Blocks.end()); + + // This assumes the PHI is already well-formed and there aren't conflicting + // incoming values for the same block. + for (auto *B : Blocks) + Values.push_back(PN->getIncomingValueForBlock(B)); + } + + // Create a PHI from an array of incoming values and incoming blocks. + template + ModelledPHI(const VArray &V, const BArray &B) { + std::copy(V.begin(), V.end(), std::back_inserter(Values)); + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + } + + // Create a PHI from [I[OpNum] for I in Insts]. + template + ModelledPHI(ArrayRef Insts, unsigned OpNum, const BArray &B) { + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + for (auto *I : Insts) + Values.push_back(I->getOperand(OpNum)); + } + + // Restrict the PHI's contents down to only \c NewBlocks. + // \c NewBlocks must be a subset of \c this->Blocks. + void restrictToBlocks(const SmallPtrSetImpl &NewBlocks) { + auto BI = Blocks.begin(); + auto VI = Values.begin(); + while (BI != Blocks.end()) { + assert(VI != Values.end()); + if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) == + NewBlocks.end()) { + BI = Blocks.erase(BI); + VI = Values.erase(VI); + } else { + ++BI; + ++VI; + } + } + assert(Blocks.size() == NewBlocks.size()); + } + + ArrayRef getValues() const { return Values; } + + bool areAllIncomingValuesSame() const { + return all_of(Values, [&](Value *V) { return V == Values[0]; }); + } + + // Hash functor + size_t operator () (const ModelledPHI &P) const { + return hash_combine_range(P.Values.begin(), P.Values.end()); + } + bool operator == (const ModelledPHI &Other) const { + return Values == Other.Values && Blocks == Other.Blocks; + } +}; +typedef std::unordered_set ModelledPHISet; + +//===----------------------------------------------------------------------===// + +/// This is a modified version of GVN's PostValueTable. PostValueTable provides +/// a value numbering based on how a value is *used* rather than what it uses. +/// We extend this here to be able to reason about memory-touching operations. +/// +/// In general it is difficult for GVN to reason about memory ordering, so it +/// returns conservative results. However, we know that we will only ever be +/// looking at instructions in blocks that share a common immediate successor. +/// This allows us to know that the memory state at the end of the blocks will +/// be identical. +/// +/// Not only this, but we know that we will only sink instructions that are +/// equivalent, so to reason about a memory instruction I it is sufficient to +/// count the number of memory defs following I. +/// +class ValueTable : private GVN::PostValueTable { + MemorySSA *MSSA; + enum ValueNumberSpecialCases : uint32_t { + VN_Volatile = 1U << 31 + }; +public: + typedef GVN::PostValueTable super; + using super::clear; + using super::lookup; + + ValueTable(MemorySSA *MSSA, AliasAnalysis *AA) : MSSA(MSSA) { + setAliasAnalysis(AA); + setIgnoreMemoryDependencies(true); + } + + uint32_t lookupOrAdd(Instruction *I) { + // We search in postorder, so we must not have seen this instruction yet. + // This is important as we modify the value number for memory-modifying + // instructions. + if (exists(I)) + return lookup(I); + uint32_t N = super::lookupOrAdd(I); + + // Volatility is not accounted for in the value numbering, so add it + // here. We don't support memory ops that are anything other than + // simple or volatile (no memory ordering or atomicity). + if (auto *SI = dyn_cast(I)) { + if (isStrongerThanUnordered(SI->getOrdering()) || SI->isAtomic()) + return ~0U; + if (SI->isVolatile()) + N |= VN_Volatile; + + } else if (auto *LI = dyn_cast(I)) { + + if (isStrongerThanUnordered(LI->getOrdering()) || LI->isAtomic()) + // Not supported. + return ~0U; + if (LI->isVolatile()) + N |= VN_Volatile; + } + + if (isMemoryInst(I)) + N |= getMemoryUseOrder(I) << 24; + + modify(I, N); + return N; + } + + // Given a memory-using instruction in Inst, return an ID describing the + // memory state at \c Inst. + // Because we look for sinking opportunities bottom-up, this returns + // the number of MemoryDefs after Inst. + uint8_t getMemoryUseOrder(const Instruction *Inst) { + const auto *AL = MSSA->getBlockAccesses(Inst->getParent()); + const auto *MA = MSSA->getMemoryAccess(Inst); + assert(AL && MA && "No memory access?"); + + auto I = std::find_if(AL->begin(), AL->end(), + [&MA](const MemoryAccess &A) { return &A == MA; }); + unsigned N = 0; + for (; I != AL->end(); ++I) { + if (!isa(*I)) + ++N; + } + return N; + } + +}; + +//===----------------------------------------------------------------------===// + +class GVNSink { +public: + GVNSink(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, + MemoryDependenceResults *MD, MemorySSA *MSSA, + TargetTransformInfo *TTI, bool OptForMinSize) + : VN(MSSA, AA), DT(DT), PDT(PDT), MD(MD), MSSA(MSSA), TTI(TTI), + OptForMinSize(OptForMinSize) {} + bool run(Function &F) { + + (void)TTI; + (void)OptForMinSize; + + DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); + + // FIXME: use lazy evaluation of VN to avoid the fix-point computation. + bool Res = false; + while (true) { + auto State = sinkExpressions(); + if (State.first == 0) + return Res; + + if (State.second) { + // To address a limitation of the current GVN, we need to rerun the + // hoisting after we hoisted loads or stores in order to be able to + // hoist all scalars dependent on the hoisted ld/st. + VN.clear(); + MSSA->invalidateAll(); + } + Res = true; + } + + return Res; + } +private: + ValueTable VN; + DominatorTree *DT; + PostDominatorTree *PDT; + MemoryDependenceResults *MD; + MemorySSA *MSSA; + const TargetTransformInfo *TTI; + const bool OptForMinSize; + + bool isInstructionBlacklisted(Instruction *I) { + // These instructions may change or break semantics if moved. + if (isa(I) || I->isEHPad() || isa(I) || + I->getType()->isTokenTy()) + return true; + // FIXME: We don't support invokes yet. + if (isa(I)) + return true; + return false; + } + + // Is it legal to place a variable in operand \c OpIdx of \c I? + // FIXME: This should be promoted to Instruction. + bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: + // FIXME: many arithmetic intrinsics have no issue taking a + // variable, however it's hard to distingish these from + // specials such as @llvm.frameaddress that require a constant. + return !isa(I); + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::ExtractValue: + case Instruction::InsertValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::Alloca: + return false; + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1); + return !It->isStructTy(); + } + } + + // The main heuristic function. Analyze the set of instructions pointed to by + // LRI and return a candidate solution if these instructions can be sunk, or + // None otherwise. + Optional + analyzeInstructionForSinking(LockstepReverseIterator &LRI, + unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, + std::set &PHIContents) { + auto Insts = *LRI; + DEBUG( + dbgs() << " -- Analyzing instruction set: [\n"; + for (auto *I : Insts) { + I->dump(); + } + dbgs() << " ]\n"; + ); + + DenseMap VNums; + for (auto *I : Insts) { + uint32_t N = VN.lookupOrAdd(I); + DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + if (N == ~0U) + return None; + VNums[N]++; + } + unsigned VNumToSink = std::max_element( + VNums.begin(), VNums.end(), [](const std::pair &I, + const std::pair &J) { + return I.second < J.second; + })->first; + + if (VNums[VNumToSink] == 1) + // Can't sink anything! + return None; + + // Now restrict the number of incoming blocks down to only those with + // VNumToSink. + auto &ActivePreds = LRI.getActiveBlocks(); + unsigned InitialActivePredSize = ActivePreds.size(); + SmallVector NewInsts; + for (auto *I : Insts) { + if (VN.lookup(I) != VNumToSink) + ActivePreds.erase(I->getParent()); + else + NewInsts.push_back(I); + } + for (auto *I : NewInsts) + if (isInstructionBlacklisted(I)) + return None; + + // If we've restricted the incoming blocks, restrict all needed PHIs also + // to that set. + // FIXME: Make a testcase for this! + bool RecomputePHIContents = false; + if (ActivePreds.size() != InitialActivePredSize) { + ModelledPHISet NewNeededPHIs; + for (auto P : NeededPHIs) { + P.restrictToBlocks(ActivePreds); + NewNeededPHIs.insert(P); + } + NeededPHIs = NewNeededPHIs; + LRI.restrictToBlocks(ActivePreds); + RecomputePHIContents = true; + } + + // The sunk instruction's results. + ModelledPHI NewPHI(NewInsts, ActivePreds); + + // Does sinking this instruction render previous PHIs redundant? + if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) { + NeededPHIs.erase(NewPHI); + RecomputePHIContents = true; + } + + if (RecomputePHIContents) { + // The needed PHIs have changed, so recompute the set of all needed + // values. + PHIContents.clear(); + for (auto &PHI : NeededPHIs) + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + // Is this instruction required by a later PHI that doesn't match this PHI? + // if so, we can't sink this instruction. + for (auto *V : NewPHI.getValues()) + if (PHIContents.count(V)) + // V exists in this PHI, but the whole PHI is different to NewPHI + // (else it would have been removed earlier). We cannot continue + // because this isn't representable. + return None; + + // Which operands need PHIs? FIXME: If any of these fail, we should + // partition up the candidates to try and continue making progress. + Instruction *I0 = NewInsts[0]; + for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { + ModelledPHI PHI(NewInsts, OpNum, ActivePreds); + if (PHI.areAllIncomingValuesSame()) + continue; + if (NeededPHIs.count(PHI)) + continue; + if (!canReplaceOperandWithVariable(I0, OpNum)) + // We can 't create a PHI from this instruction! + return None; + // Don't create indirect calls !The called value is the final operand. + if ((isa(I0) || isa(I0)) && OpNum == E - 1) + // FIXME : if the call was *already* indirect, we should do this. + return None; + + NeededPHIs.insert(PHI); + } + + if (isMemoryInst(NewInsts[0])) + ++MemoryInstNum; + + SinkingInstructionCandidate Cand; + Cand.NumInstructions = ++InstNum; + Cand.NumMemoryInsts = MemoryInstNum; + Cand.NumBlocks = ActivePreds.size(); + Cand.NumPHIs = NeededPHIs.size(); + for (auto *C : ActivePreds) + Cand.Blocks.push_back(C); + + return Cand; + } + + // Create a ModelledPHI for each PHI in BB, adding to PHIs. + void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, + std::set &PHIContents) { + for (auto &I : *BB) { + auto *PN = dyn_cast(&I); + if (!PN) + return; + + auto MPHI = ModelledPHI(PN); + PHIs.insert(MPHI); + for (auto *V : MPHI.getValues()) + PHIContents.insert(V); + } + } + + // The main instruction sinking driver. Set up state and try and sink + // instructions into BBEnd from its predecessors. + std::pair sinkBB(BasicBlock *BBEnd) { + DEBUG(dbgs() << "GVNSink: running on basic block "; + BBEnd->printAsOperand(dbgs()); dbgs() << "\n"); + SmallVector Preds; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa(T) || isa(T)) + Preds.push_back(B); + else + return {0, false}; + } + if (Preds.size() < 2) + return {0, false}; + std::stable_sort(Preds.begin(), Preds.end()); + + LockstepReverseIterator LRI(Preds); + SmallVector Candidates; + unsigned InstNum = 0, MemoryInstNum = 0; + ModelledPHISet NeededPHIs; + std::set PHIContents; + analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); + unsigned NumOrigPHIs = NeededPHIs.size(); + + while (LRI.isValid()) { + auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, + NeededPHIs, PHIContents); + if (!Cand) + break; + Cand->calculateCost(NumOrigPHIs, Preds.size()); + Candidates.emplace_back(*Cand); + --LRI; + } + + std::stable_sort(Candidates.begin(), Candidates.end()); + std::reverse(Candidates.begin(), Candidates.end()); + DEBUG( + dbgs() << " -- Sinking candidates:\n"; + for (auto &C : Candidates) + dbgs() << " " << C << "\n"; + ); + + // Pick the top candidate, as long it is positive! + if (Candidates.empty() || Candidates.front().Cost <= 0) + return {0, false}; + auto C = Candidates.front(); + + DEBUG(dbgs() << " -- Sinking: " << C << "\n"); + BasicBlock *InsertBB = BBEnd; + if (C.Blocks.size() < Preds.size()) { + DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs()); + dbgs() << "\n"); + InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); + if (!InsertBB) { + DEBUG(dbgs() << " -- FAILED to split edge!\n"); + // Edge couldn't be split. + return {0, false}; + } + // FIXME: Make SplitBlockPredecessors update PDT too. + DT->recalculate(*BBEnd->getParent()); + PDT->recalculate(*BBEnd->getParent()); + } + + for (unsigned I = 0; I < C.NumInstructions; ++I) + sinkLastInstruction(C.Blocks, InsertBB); + + return {C.NumInstructions, + C.NumMemoryInsts > 0 || C.Blocks.size() < Preds.size()}; + } + + // Perform the actual mechanics of sinking an instruction from Blocks into + // BBEnd, which is their only successor. + void sinkLastInstruction(ArrayRef Blocks, BasicBlock *BBEnd) { + SmallVector Insts; + for (auto *BB : Blocks) + Insts.push_back(BB->getTerminator()->getPrevNode()); + Instruction *I0 = Insts.front(); + + SmallVector NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // FIXME: I'm certain this MemorySSA update code is wrong, but not sure + // how to fix it. + if (isMemoryInst(I0)) { + MemoryPhi *PN = MSSA->getMemoryAccess(BBEnd); + MemoryPhi *CreatedPN = nullptr; + if (!PN) + PN = CreatedPN = MSSA->createMemoryPhi(BBEnd); + for (auto *I : Insts) { + auto *MA = cast(MSSA->getMemoryAccess(I)); + if (CreatedPN) + PN->addIncoming(MA->getDefiningAccess(), I->getParent()); + else + PN->setIncomingValue(PN->getBasicBlockIndex(I->getParent()), + MA->getDefiningAccess()); + MSSA->removeMemoryAccess(MA); + } + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + // Update metadata and IR flags. + for (auto *I : Insts) + if (I != I0) { + combineMetadataForCSE(I0, I); + I0->andIRFlags(I); + } + + if (isMemoryInst(I0)) { + auto *PN = MSSA->getMemoryAccess(BBEnd); + MSSA->createMemoryAccessInBB(I0, PN, BBEnd, MemorySSA::Beginning); + } + + for (auto *I : Insts) + if (I != I0) + I->replaceAllUsesWith(I0); + foldPointlessPHINodes(BBEnd); + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); + // FIXME: ! + MSSA->invalidateAll(); + } + + // Remove PHIs that all have the same incoming value. + void foldPointlessPHINodes(BasicBlock *BB) { + auto I = BB->begin(); + while (PHINode *PN = dyn_cast(I++)) { + if (!all_of(PN->incoming_values(), [&](const Value *V) { + return V == PN->getIncomingValue(0); + })) + continue; + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + + MD->removeInstruction(PN); // Memdep updates AA itself. + PN->eraseFromParent(); + } + } + + // Sink all expressions. Return true if a memory instruction was sunk. + std::pair sinkExpressions() { + if (!PDT->getRootNode()) + // This can happen if the function has no returns. + return {0, 0}; + for (auto &N : depth_first(PDT)) { + auto Res = sinkBB(N->getBlock()); + if (Res.first) + return Res; + } + return {0, false}; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// Pass machinery / boilerplate + +class GVNSinkLegacyPass : public FunctionPass { +public: + static char ID; + + GVNSinkLegacyPass() : FunctionPass(ID) { + initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + auto &DT = getAnalysis().getDomTree(); + auto &PDT = getAnalysis().getPostDomTree(); + auto &AA = getAnalysis().getAAResults(); + auto &MD = getAnalysis().getMemDep(); + auto &MSSA = getAnalysis().getMSSA(); + auto &TTI = getAnalysis().getTTI(F); + + GVNSink G(&DT, &PDT, &AA, &MD, &MSSA, &TTI, F.optForMinSize()); + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + } +}; +} // namespace + +PreservedAnalyses GVNSinkPass::run(Function &F, + FunctionAnalysisManager &AM) { + DominatorTree &DT = AM.getResult(F); + PostDominatorTree &PDT = AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + MemoryDependenceResults &MD = AM.getResult(F); + MemorySSA &MSSA = AM.getResult(F).getMSSA(); + auto &TTI = AM.getResult(F); + GVNSink G(&DT, &PDT, &AA, &MD, &MSSA, &TTI, F.optForMinSize()); + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} + +char GVNSinkLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) + +FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -46,6 +46,7 @@ initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); + initializeGVNSinkLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- lib/Transforms/Utils/MemorySSA.cpp +++ lib/Transforms/Utils/MemorySSA.cpp @@ -1232,6 +1232,22 @@ MA.dropAllReferences(); } +void MemorySSA::invalidateAll() { + // Drop all our references + for (const auto &Pair : PerBlockAccesses) + for (MemoryAccess &MA : *Pair.second) + MA.dropAllReferences(); + LiveOnEntryDef = nullptr; + Walker = nullptr; + NextID = 0; + ValueToMemoryAccess.clear(); + PerBlockAccesses.clear(); + BlockNumberingValid.clear(); + BlockNumbering.clear(); + + buildMemorySSA(); +} + MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); Index: test/Transforms/GVNSink/sink-common-code.ll =================================================================== --- /dev/null +++ test/Transforms/GVNSink/sink-common-code.ll @@ -0,0 +1,694 @@ +; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s + +define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test1 +; CHECK: add +; CHECK: select +; CHECK: icmp +; CHECK-NOT: br +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test2 +; CHECK: add +; CHECK: select +; CHECK: icmp +; CHECK-NOT: br +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + %add = add i32 %nblks, %blksB + %cmp2 = icmp uge i32 %blksA, %add + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +declare i32 @foo(i32, i32) nounwind readnone + +define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test3 +; CHECK: select +; CHECK: call +; CHECK: call +; CHECK: add +; CHECK-NOT: br + +define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test4 +; CHECK: select +; CHECK: store +; CHECK-NOT: store + +define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test5 +; CHECK: store volatile +; CHECK: store + +define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test6 +; CHECK: select +; CHECK: store volatile +; CHECK-NOT: store + +define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test7 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile +; CHECK-NOT: load +; CHECK-NOT: store + +; The extra store in %if.then means %z and %w are not equivalent. +define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + store i32 7, i32* %p + %z = load volatile i32, i32* %y + store i32 6, i32* %p + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test9 +; CHECK: add +; CHECK: add + +%struct.anon = type { i32, i32 } + +; The GEP indexes a struct type so cannot have a variable last index. +define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 5 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + store volatile i32 %x, i32* %gepa + br label %if.end + +if.else: + %dummy1 = add i32 %x, 6 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + store volatile i32 %x, i32* %gepb + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test10 +; CHECK: getelementptr +; CHECK: store volatile +; CHECK: getelementptr +; CHECK: store volatile + +; The shufflevector's mask operand cannot be merged in a PHI. +define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> + br label %if.end + +if.end: + %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test11 +; CHECK: shufflevector +; CHECK: shufflevector + +; We can't common an intrinsic! +define i32 @test12(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = call i32 @llvm.ctlz.i32(i32 %x) + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = call i32 @llvm.cttz.i32(i32 %x) + br label %if.end + +if.end: + %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +declare i32 @llvm.ctlz.i32(i32 %x) readnone +declare i32 @llvm.cttz.i32(i32 %x) readnone + +; CHECK-LABEL: test12 +; CHECK: call i32 @llvm.ctlz +; CHECK: call i32 @llvm.cttz + +; The TBAA metadata should be properly combined. +define i32 @test13(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y, !tbaa !3 + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y, !tbaa !4 + br label %if.end + +if.end: + ret i32 1 +} + +!0 = !{ !"an example type tree" } +!1 = !{ !"int", !0 } +!2 = !{ !"float", !0 } +!3 = !{ !"const float", !2, i64 0 } +!4 = !{ !"special float", !2, i64 1 } + +; CHECK-LABEL: test13 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile {{.*}}, !tbaa !0 +; CHECK-NOT: load +; CHECK-NOT: store + +; The call should be commoned. +define i32 @test13a(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %sv1 = call i32 @bar(i32 %x) + br label %if.end + +if.else: + %sv2 = call i32 @bar(i32 %y) + br label %if.end + +if.end: + %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} +declare i32 @bar(i32) + +; CHECK-LABEL: test13a +; CHECK: %[[x:.*]] = select i1 %flag +; CHECK: call i32 @bar(i32 %[[x]]) + +; The load should be commoned. +define i32 @test14(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 1 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv1 = load i32, i32* %gepa + %cmp1 = icmp eq i32 %sv1, 56 + br label %if.end + +if.else: + %dummy2 = add i32 %x, 4 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv2 = load i32, i32* %gepb + %cmp2 = icmp eq i32 %sv2, 57 + br label %if.end + +if.end: + %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test14 +; CHECK: getelementptr +; CHECK: load +; CHECK-NOT: load + +; The load should be commoned. +define i32 @test15(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 1 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + %sv1 = load i32, i32* %gepa + %ext1 = zext i32 %sv1 to i64 + %cmp1 = icmp eq i64 %ext1, 56 + br label %if.end + +if.else: + %dummy2 = add i32 %x, 4 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv2 = load i32, i32* %gepb + %ext2 = zext i32 %sv2 to i64 + %cmp2 = icmp eq i64 %ext2, 56 + br label %if.end + +if.end: + %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test15 +; CHECK: getelementptr +; CHECK: load +; CHECK-NOT: load + +define zeroext i1 @test_crash(i1 zeroext %flag, i32* %i4, i32* %m, i32* %n) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %tmp1 = load i32, i32* %i4 + %tmp2 = add i32 %tmp1, -1 + store i32 %tmp2, i32* %i4 + br label %if.end + +if.else: + %tmp3 = load i32, i32* %m + %tmp4 = load i32, i32* %n + %tmp5 = add i32 %tmp3, %tmp4 + store i32 %tmp5, i32* %i4 + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: test_crash +; No checks for test_crash - just ensure it doesn't crash! + +define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { + +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test16 +; CHECK: zext +; CHECK: zext + +define zeroext i1 @test16a(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks, i8* %p) { + +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + %b1 = sext i8 %frombool1 to i32 + %b2 = trunc i32 %b1 to i8 + store i8 %b2, i8* %p + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + %a1 = sext i8 %frombool3 to i32 + %a2 = trunc i32 %a1 to i8 + store i8 %a2, i8* %p + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: test16a +; CHECK: zext +; CHECK-NOT: zext + +define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = call i8 @i1toi8(i1 %cmp) + %a1 = sext i8 %frombool1 to i32 + %a2 = trunc i32 %a1 to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = call i8 @i1toi8(i1 %cmp2) + %b1 = sext i8 %frombool3 to i32 + %b2 = trunc i32 %b1 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %a2, %if.then ], [ %b2, %if.then2 ], [ 0, %entry ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} +declare i8 @i1toi8(i1) + +; FIXME: DISABLED - we don't consider this profitable. We should +; - Consider argument setup/return mov'ing for calls, like InlineCost does. +; - Consider the removal of the %obeys.0 PHI (zero PHI movement overall) + +; DISABLED-CHECK-LABEL: test17 +; DISABLED-CHECK: if.then: +; DISABLED-CHECK-NEXT: icmp uge +; DISABLED-CHECK-NEXT: br label %[[x:.*]] + +; DISABLED-CHECK: if.then2: +; DISABLED-CHECK-NEXT: add +; DISABLED-CHECK-NEXT: icmp ule +; DISABLED-CHECK-NEXT: br label %[[x]] + +; DISABLED-CHECK: [[x]]: +; DISABLED-CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp +; DISABLED-CHECK-NEXT: %[[z:.*]] = call i8 @i1toi8(i1 %[[y]]) +; DISABLED-CHECK-NEXT: br label %if.end + +; DISABLED-CHECK: if.end: +; DISABLED-CHECK-NEXT: phi i8 +; DISABLED-CHECK-DAG: [ %[[z]], %[[x]] ] +; DISABLED-CHECK-DAG: [ 0, %entry ] + +define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.then3 [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.then3: + %add2 = add i32 %nblks, %blksA + %cmp3 = icmp ule i32 %add2, %blksA + %frombool4 = zext i1 %cmp3 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test18 +; CHECK: if.end: +; CHECK-NEXT: %[[x:.*]] = phi i1 +; CHECK-DAG: [ %cmp, %if.then ] +; CHECK-DAG: [ %cmp2, %if.then2 ] +; CHECK-DAG: [ %cmp3, %if.then3 ] +; CHECK-NEXT: zext i1 %[[x]] to i8 + +; The phi is confusing - both add instructions are used by it, but +; not on their respective unconditional arcs. It should not be +; optimized. +define void @test_pr30292(i1 %cond, i1 %cond2, i32 %a, i32 %b) { +entry: + %add1 = add i32 %a, 1 + br label %succ + +one: + br i1 %cond, label %two, label %succ + +two: + call void @g() + %add2 = add i32 %a, 1 + br label %succ + +succ: + %p = phi i32 [ 0, %entry ], [ %add1, %one ], [ %add2, %two ] + br label %one +} +declare void @g() + +; CHECK-LABEL: test_pr30292 +; CHECK: phi i32 [ 0, %entry ], [ %add1, %succ ], [ %add2, %two ] + +define zeroext i1 @test_pr30244(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { + +entry: + %p = alloca i8 + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + store i8 %frombool1, i8* %p + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + store i8 %frombool3, i8* %p + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: @test_pr30244 +; CHECK: store +; CHECK: store + +define i32 @test_pr30373a(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + %z0 = lshr i32 %y0, 8 + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + %z1 = lshr exact i32 %y1, 8 + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test_pr30373a +; CHECK: lshr +; CHECK-NOT: exact +; CHECK: } + +define i32 @test_pr30373b(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + %z0 = lshr exact i32 %y0, 8 + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + %z1 = lshr i32 %y1, 8 + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test_pr30373b +; CHECK: lshr +; CHECK-NOT: exact +; CHECK: } + +; CHECK: !0 = !{!1, !1, i64 0} +; CHECK: !1 = !{!"float", !2} +; CHECK: !2 = !{!"an example type tree"} Index: test/Transforms/SimplifyCFG/sink-common-code.ll =================================================================== --- test/Transforms/SimplifyCFG/sink-common-code.ll +++ test/Transforms/SimplifyCFG/sink-common-code.ll @@ -414,7 +414,7 @@ %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 %sv2 = load i32, i32* %gepb %ext2 = zext i32 %sv2 to i64 - %cmp2 = icmp eq i64 %ext2, 57 + %cmp2 = icmp eq i64 %ext2, 56 br label %if.end if.end: @@ -488,7 +488,9 @@ if.then: %cmp = icmp uge i32 %blksA, %nblks %frombool1 = zext i1 %cmp to i8 - store i8 %frombool1, i8* %p + %b1 = sext i8 %frombool1 to i32 + %b2 = trunc i32 %b1 to i8 + store i8 %b2, i8* %p br label %if.end if.else: @@ -498,7 +500,9 @@ %add = add i32 %nblks, %blksB %cmp2 = icmp ule i32 %add, %blksA %frombool3 = zext i1 %cmp2 to i8 - store i8 %frombool3, i8* %p + %a1 = sext i8 %frombool3 to i32 + %a2 = trunc i32 %a1 to i8 + store i8 %a2, i8* %p br label %if.end if.end: @@ -519,16 +523,20 @@ if.then: %cmp = icmp uge i32 %blksA, %nblks %frombool1 = call i8 @i1toi8(i1 %cmp) + %a1 = sext i8 %frombool1 to i32 + %a2 = trunc i32 %a1 to i8 br label %if.end if.then2: %add = add i32 %nblks, %blksB %cmp2 = icmp ule i32 %add, %blksA %frombool3 = call i8 @i1toi8(i1 %cmp2) + %b1 = sext i8 %frombool3 to i32 + %b2 = trunc i32 %b1 to i8 br label %if.end if.end: - %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %entry ] + %obeys.0 = phi i8 [ %a2, %if.then ], [ %b2, %if.then2 ], [ 0, %entry ] %tobool4 = icmp ne i8 %obeys.0, 0 ret i1 %tobool4 }