Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -144,6 +144,7 @@ void initializeGCModuleInfoPass(PassRegistry&); void initializeGCOVProfilerLegacyPassPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); +void initializeGVNSinkLegacyPassPass(PassRegistry&); void initializeGVNLegacyPassPass(PassRegistry&); void initializeGlobalDCELegacyPassPass(PassRegistry&); void initializeGlobalMergePass(PassRegistry&); Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -356,6 +356,13 @@ //===----------------------------------------------------------------------===// // +// GVNSink - This pass uses an "inverted" value numbering to decide the +// similarity of expressions and sinks similar expressions into successors. +// +FunctionPass *createGVNSinkPass(); + +//===----------------------------------------------------------------------===// +// // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // Index: llvm/trunk/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/GVN.h +++ llvm/trunk/include/llvm/Transforms/Scalar/GVN.h @@ -238,7 +238,12 @@ /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; - +/// \brief Uses an "inverted" value numbering to decide the similarity of +/// expressions and sinks similar expressions into successors. +struct GVNSinkPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } #endif Index: llvm/trunk/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/Local.h +++ llvm/trunk/include/llvm/Transforms/Utils/Local.h @@ -410,6 +410,14 @@ void maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI, const TargetLibraryInfo *TLI); +//===----------------------------------------------------------------------===// +// Transform predicates +// + +/// Given an instruction, is it legal to set operand OpIdx to a non-constant +/// value? +bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx); + } // End llvm namespace #endif Index: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -155,6 +155,10 @@ cl::Hidden, cl::desc("Enable the simple loop unswitch pass.")); +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = on)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -307,6 +311,11 @@ MPM.add(createEarlyCSEPass()); // Catch trivial redundancies if (EnableGVNHoist) MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } + // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. Index: llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/trunk/lib/Transforms/Scalar/CMakeLists.txt @@ -13,6 +13,7 @@ GuardWidening.cpp GVN.cpp GVNHoist.cpp + GVNSink.cpp IVUsersPrinter.cpp InductiveRangeCheckElimination.cpp IndVarSimplify.cpp Index: llvm/trunk/lib/Transforms/Scalar/GVNSink.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/GVNSink.cpp +++ llvm/trunk/lib/Transforms/Scalar/GVNSink.cpp @@ -0,0 +1,870 @@ +//===- GVNSink.cpp - sink expressions into successors -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file GVNSink.cpp +/// This pass attempts to sink instructions into successors, reducing static +/// instruction count and enabling if-conversion. +/// +/// We use a variant of global value numbering to decide what can be sunk. +/// Consider: +/// +/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] +/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] +/// \ / +/// [ %e = phi i32 %a2, %c2 ] +/// [ add i32 %e, 4 ] +/// +/// +/// GVN would number %a1 and %c1 differently because they compute different +/// results - the VN of an instruction is a function of its opcode and the +/// transitive closure of its operands. This is the key property for hoisting +/// and CSE. +/// +/// What we want when sinking however is for a numbering that is a function of +/// the *uses* of an instruction, which allows us to answer the question "if I +/// replace %a1 with %c1, will it contribute in an equivalent way to all +/// successive instructions?". The PostValueTable class in GVN provides this +/// mapping. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/GVNExpression.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "gvn-sink" + +STATISTIC(NumRemoved, "Number of instructions removed"); + +namespace { + +static bool isMemoryInst(const Instruction *I) { + return isa(I) || isa(I) || + (isa(I) && !cast(I)->doesNotAccessMemory()) || + (isa(I) && !cast(I)->doesNotAccessMemory()); +} + +/// Iterates through instructions in a set of blocks in reverse order from the +/// first non-terminator. For example (assume all blocks have size n): +/// LockstepReverseIterator I([B1, B2, B3]); +/// *I-- = [B1[n], B2[n], B3[n]]; +/// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; +/// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; +/// ... +/// +/// It continues until all blocks have been exhausted. Use \c getActiveBlocks() +/// to +/// determine which blocks are still going and the order they appear in the +/// list returned by operator*. +class LockstepReverseIterator { + ArrayRef Blocks; + SmallPtrSet ActiveBlocks; + SmallVector Insts; + bool Fail; + +public: + LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + ActiveBlocks.clear(); + for (BasicBlock *BB : Blocks) + ActiveBlocks.insert(BB); + Insts.clear(); + for (BasicBlock *BB : Blocks) { + if (BB->size() <= 1) { + // Block wasn't big enough - only contained a terminator. + ActiveBlocks.erase(BB); + continue; + } + Insts.push_back(BB->getTerminator()->getPrevNode()); + } + if (Insts.empty()) + Fail = true; + } + + bool isValid() const { return !Fail; } + ArrayRef operator*() const { return Insts; } + SmallPtrSet &getActiveBlocks() { return ActiveBlocks; } + + void restrictToBlocks(SmallPtrSetImpl &Blocks) { + for (auto II = Insts.begin(); II != Insts.end();) { + if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == + Blocks.end()) { + ActiveBlocks.erase((*II)->getParent()); + II = Insts.erase(II); + } else { + ++II; + } + } + } + + void operator--() { + if (Fail) + return; + SmallVector NewInsts; + for (auto *Inst : Insts) { + if (Inst == &Inst->getParent()->front()) + ActiveBlocks.erase(Inst->getParent()); + else + NewInsts.push_back(Inst->getPrevNode()); + } + if (NewInsts.empty()) { + Fail = true; + return; + } + Insts = NewInsts; + } +}; + +//===----------------------------------------------------------------------===// + +/// Candidate solution for sinking. There may be different ways to +/// sink instructions, differing in the number of instructions sunk, +/// the number of predecessors sunk from and the number of PHIs +/// required. +struct SinkingInstructionCandidate { + unsigned NumBlocks; + unsigned NumInstructions; + unsigned NumPHIs; + unsigned NumMemoryInsts; + int Cost = -1; + SmallVector Blocks; + + void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { + unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; + unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; + Cost = (NumInstructions * (NumBlocks - 1)) - + (NumExtraPHIs * + NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. + - SplitEdgeCost; + } + bool operator>=(const SinkingInstructionCandidate &Other) const { + return Cost >= Other.Cost; + } +}; + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SinkingInstructionCandidate &C) { + OS << ""; + return OS; +} + +//===----------------------------------------------------------------------===// + +/// Describes a PHI node that may or may not exist. These track the PHIs +/// that must be created if we sunk a sequence of instructions. It provides +/// a hash function for efficient equality comparisons. +class ModelledPHI { + SmallVector Values; + SmallVector Blocks; + +public: + ModelledPHI() {} + ModelledPHI(const PHINode *PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) + Blocks.push_back(PN->getIncomingBlock(I)); + std::sort(Blocks.begin(), Blocks.end()); + + // This assumes the PHI is already well-formed and there aren't conflicting + // incoming values for the same block. + for (auto *B : Blocks) + Values.push_back(PN->getIncomingValueForBlock(B)); + } + /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI + /// without the same ID. + /// \note This is specifically for DenseMapInfo - do not use this! + static ModelledPHI createDummy(unsigned ID) { + ModelledPHI M; + M.Values.push_back(reinterpret_cast(ID)); + return M; + } + + /// Create a PHI from an array of incoming values and incoming blocks. + template + ModelledPHI(const VArray &V, const BArray &B) { + std::copy(V.begin(), V.end(), std::back_inserter(Values)); + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + } + + /// Create a PHI from [I[OpNum] for I in Insts]. + template + ModelledPHI(ArrayRef Insts, unsigned OpNum, const BArray &B) { + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + for (auto *I : Insts) + Values.push_back(I->getOperand(OpNum)); + } + + /// Restrict the PHI's contents down to only \c NewBlocks. + /// \c NewBlocks must be a subset of \c this->Blocks. + void restrictToBlocks(const SmallPtrSetImpl &NewBlocks) { + auto BI = Blocks.begin(); + auto VI = Values.begin(); + while (BI != Blocks.end()) { + assert(VI != Values.end()); + if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) == + NewBlocks.end()) { + BI = Blocks.erase(BI); + VI = Values.erase(VI); + } else { + ++BI; + ++VI; + } + } + assert(Blocks.size() == NewBlocks.size()); + } + + ArrayRef getValues() const { return Values; } + + bool areAllIncomingValuesSame() const { + return all_of(Values, [&](Value *V) { return V == Values[0]; }); + } + bool areAllIncomingValuesSameType() const { + return all_of( + Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); + } + bool areAnyIncomingValuesConstant() const { + return any_of(Values, [&](Value *V) { return isa(V); }); + } + // Hash functor + unsigned hash() const { + return (unsigned)hash_combine_range(Values.begin(), Values.end()); + } + bool operator==(const ModelledPHI &Other) const { + return Values == Other.Values && Blocks == Other.Blocks; + } +}; + +template struct DenseMapInfo { + static inline ModelledPHI &getEmptyKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(0); + return Dummy; + } + static inline ModelledPHI &getTombstoneKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(1); + return Dummy; + } + static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } + static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { + return LHS == RHS; + } +}; + +typedef DenseSet> ModelledPHISet; + +//===----------------------------------------------------------------------===// +// ValueTable +//===----------------------------------------------------------------------===// +// This is a value number table where the value number is a function of the +// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know +// that the program would be equivalent if we replaced A with PHI(A, B). +//===----------------------------------------------------------------------===// + +/// A GVN expression describing how an instruction is used. The operands +/// field of BasicExpression is used to store uses, not operands. +/// +/// This class also contains fields for discriminators used when determining +/// equivalence of instructions with sideeffects. +class InstructionUseExpr : public GVNExpression::BasicExpression { + unsigned MemoryUseOrder = -1; + bool Volatile = false; + +public: + InstructionUseExpr(Instruction *I, ArrayRecycler &R, + BumpPtrAllocator &A) + : GVNExpression::BasicExpression(I->getNumUses()) { + allocateOperands(R, A); + setOpcode(I->getOpcode()); + setType(I->getType()); + + for (auto &U : I->uses()) + op_push_back(U.getUser()); + std::sort(op_begin(), op_end()); + } + void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } + void setVolatile(bool V) { Volatile = V; } + + virtual hash_code getHashValue() const { + return hash_combine(GVNExpression::BasicExpression::getHashValue(), + MemoryUseOrder, Volatile); + } + + template hash_code getHashValue(Function MapFn) { + hash_code H = + hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile); + for (auto *V : operands()) + H = hash_combine(H, MapFn(V)); + return H; + } +}; + +class ValueTable { + DenseMap ValueNumbering; + DenseMap ExpressionNumbering; + DenseMap HashNumbering; + BumpPtrAllocator Allocator; + ArrayRecycler Recycler; + uint32_t nextValueNumber; + + /// Create an expression for I based on its opcode and its uses. If I + /// touches or reads memory, the expression is also based upon its memory + /// order - see \c getMemoryUseOrder(). + InstructionUseExpr *createExpr(Instruction *I) { + InstructionUseExpr *E = + new (Allocator) InstructionUseExpr(I, Recycler, Allocator); + if (isMemoryInst(I)) + E->setMemoryUseOrder(getMemoryUseOrder(I)); + + if (CmpInst *C = dyn_cast(I)) { + CmpInst::Predicate Predicate = C->getPredicate(); + E->setOpcode((C->getOpcode() << 8) | Predicate); + } + return E; + } + + /// Helper to compute the value number for a memory instruction + /// (LoadInst/StoreInst), including checking the memory ordering and + /// volatility. + template InstructionUseExpr *createMemoryExpr(Inst *I) { + if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic()) + return nullptr; + InstructionUseExpr *E = createExpr(I); + E->setVolatile(I->isVolatile()); + return E; + } + +public: + /// Returns the value number for the specified value, assigning + /// it a new number if it did not have one before. + uint32_t lookupOrAdd(Value *V) { + auto VI = ValueNumbering.find(V); + if (VI != ValueNumbering.end()) + return VI->second; + + if (!isa(V)) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + Instruction *I = cast(V); + InstructionUseExpr *exp = nullptr; + switch (I->getOpcode()) { + case Instruction::Load: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Store: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Call: + case Instruction::Invoke: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + case Instruction::Select: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::InsertValue: + case Instruction::GetElementPtr: + exp = createExpr(I); + break; + default: + break; + } + + if (!exp) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t e = ExpressionNumbering[exp]; + if (!e) { + hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); }); + auto I = HashNumbering.find(H); + if (I != HashNumbering.end()) { + e = I->second; + } else { + e = nextValueNumber++; + HashNumbering[H] = e; + ExpressionNumbering[exp] = e; + } + } + ValueNumbering[V] = e; + return e; + } + + /// Returns the value number of the specified value. Fails if the value has + /// not yet been numbered. + uint32_t lookup(Value *V) const { + auto VI = ValueNumbering.find(V); + assert(VI != ValueNumbering.end() && "Value not numbered?"); + return VI->second; + } + + /// Removes all value numberings and resets the value table. + void clear() { + ValueNumbering.clear(); + ExpressionNumbering.clear(); + HashNumbering.clear(); + Recycler.clear(Allocator); + nextValueNumber = 1; + } + + ValueTable() : nextValueNumber(1) {} + + /// \c Inst uses or touches memory. Return an ID describing the memory state + /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), + /// the exact same memory operations happen after I1 and I2. + /// + /// This is a very hard problem in general, so we use domain-specific + /// knowledge that we only ever check for equivalence between blocks sharing a + /// single immediate successor that is common, and when determining if I1 == + /// I2 we will have already determined that next(I1) == next(I2). This + /// inductive property allows us to simply return the value number of the next + /// instruction that defines memory. + uint32_t getMemoryUseOrder(Instruction *Inst) { + auto *BB = Inst->getParent(); + for (auto I = std::next(Inst->getIterator()), E = BB->end(); + I != E && !I->isTerminator(); ++I) { + if (!isMemoryInst(&*I)) + continue; + if (isa(&*I)) + continue; + CallInst *CI = dyn_cast(&*I); + if (CI && CI->onlyReadsMemory()) + continue; + InvokeInst *II = dyn_cast(&*I); + if (II && II->onlyReadsMemory()) + continue; + return lookupOrAdd(&*I); + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// + +class GVNSink { +public: + GVNSink() : VN() {} + bool run(Function &F) { + DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); + + unsigned NumSunk = 0; + ReversePostOrderTraversal RPOT(&F); + for (auto *N : RPOT) + NumSunk += sinkBB(N); + + return NumSunk > 0; + } + +private: + ValueTable VN; + + bool isInstructionBlacklisted(Instruction *I) { + // These instructions may change or break semantics if moved. + if (isa(I) || I->isEHPad() || isa(I) || + I->getType()->isTokenTy()) + return true; + return false; + } + + /// The main heuristic function. Analyze the set of instructions pointed to by + /// LRI and return a candidate solution if these instructions can be sunk, or + /// None otherwise. + Optional analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents); + + /// Create a ModelledPHI for each PHI in BB, adding to PHIs. + void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, + SmallPtrSetImpl &PHIContents) { + for (auto &I : *BB) { + auto *PN = dyn_cast(&I); + if (!PN) + return; + + auto MPHI = ModelledPHI(PN); + PHIs.insert(MPHI); + for (auto *V : MPHI.getValues()) + PHIContents.insert(V); + } + } + + /// The main instruction sinking driver. Set up state and try and sink + /// instructions into BBEnd from its predecessors. + unsigned sinkBB(BasicBlock *BBEnd); + + /// Perform the actual mechanics of sinking an instruction from Blocks into + /// BBEnd, which is their only successor. + void sinkLastInstruction(ArrayRef Blocks, BasicBlock *BBEnd); + + /// Remove PHIs that all have the same incoming value. + void foldPointlessPHINodes(BasicBlock *BB) { + auto I = BB->begin(); + while (PHINode *PN = dyn_cast(I++)) { + if (!all_of(PN->incoming_values(), + [&](const Value *V) { return V == PN->getIncomingValue(0); })) + continue; + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } + } +}; + +Optional GVNSink::analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents) { + auto Insts = *LRI; + DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I + : Insts) { + I->dump(); + } dbgs() << " ]\n";); + + DenseMap VNums; + for (auto *I : Insts) { + uint32_t N = VN.lookupOrAdd(I); + DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + if (N == ~0U) + return None; + VNums[N]++; + } + unsigned VNumToSink = + std::max_element(VNums.begin(), VNums.end(), + [](const std::pair &I, + const std::pair &J) { + return I.second < J.second; + }) + ->first; + + if (VNums[VNumToSink] == 1) + // Can't sink anything! + return None; + + // Now restrict the number of incoming blocks down to only those with + // VNumToSink. + auto &ActivePreds = LRI.getActiveBlocks(); + unsigned InitialActivePredSize = ActivePreds.size(); + SmallVector NewInsts; + for (auto *I : Insts) { + if (VN.lookup(I) != VNumToSink) + ActivePreds.erase(I->getParent()); + else + NewInsts.push_back(I); + } + for (auto *I : NewInsts) + if (isInstructionBlacklisted(I)) + return None; + + // If we've restricted the incoming blocks, restrict all needed PHIs also + // to that set. + bool RecomputePHIContents = false; + if (ActivePreds.size() != InitialActivePredSize) { + ModelledPHISet NewNeededPHIs; + for (auto P : NeededPHIs) { + P.restrictToBlocks(ActivePreds); + NewNeededPHIs.insert(P); + } + NeededPHIs = NewNeededPHIs; + LRI.restrictToBlocks(ActivePreds); + RecomputePHIContents = true; + } + + // The sunk instruction's results. + ModelledPHI NewPHI(NewInsts, ActivePreds); + + // Does sinking this instruction render previous PHIs redundant? + if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) { + NeededPHIs.erase(NewPHI); + RecomputePHIContents = true; + } + + if (RecomputePHIContents) { + // The needed PHIs have changed, so recompute the set of all needed + // values. + PHIContents.clear(); + for (auto &PHI : NeededPHIs) + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + // Is this instruction required by a later PHI that doesn't match this PHI? + // if so, we can't sink this instruction. + for (auto *V : NewPHI.getValues()) + if (PHIContents.count(V)) + // V exists in this PHI, but the whole PHI is different to NewPHI + // (else it would have been removed earlier). We cannot continue + // because this isn't representable. + return None; + + // Which operands need PHIs? + // FIXME: If any of these fail, we should partition up the candidates to + // try and continue making progress. + Instruction *I0 = NewInsts[0]; + for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { + ModelledPHI PHI(NewInsts, OpNum, ActivePreds); + if (PHI.areAllIncomingValuesSame()) + continue; + if (!canReplaceOperandWithVariable(I0, OpNum)) + // We can 't create a PHI from this instruction! + return None; + if (NeededPHIs.count(PHI)) + continue; + if (!PHI.areAllIncomingValuesSameType()) + return None; + // Don't create indirect calls! The called value is the final operand. + if ((isa(I0) || isa(I0)) && OpNum == E - 1 && + PHI.areAnyIncomingValuesConstant()) + return None; + + NeededPHIs.reserve(NeededPHIs.size()); + NeededPHIs.insert(PHI); + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + if (isMemoryInst(NewInsts[0])) + ++MemoryInstNum; + + SinkingInstructionCandidate Cand; + Cand.NumInstructions = ++InstNum; + Cand.NumMemoryInsts = MemoryInstNum; + Cand.NumBlocks = ActivePreds.size(); + Cand.NumPHIs = NeededPHIs.size(); + for (auto *C : ActivePreds) + Cand.Blocks.push_back(C); + + return Cand; +} + +unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { + DEBUG(dbgs() << "GVNSink: running on basic block "; + BBEnd->printAsOperand(dbgs()); dbgs() << "\n"); + SmallVector Preds; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa(T) || isa(T)) + Preds.push_back(B); + else + return 0; + } + if (Preds.size() < 2) + return 0; + std::sort(Preds.begin(), Preds.end()); + + unsigned NumOrigPreds = Preds.size(); + // We can only sink instructions through unconditional branches. + for (auto I = Preds.begin(); I != Preds.end();) { + if ((*I)->getTerminator()->getNumSuccessors() != 1) + I = Preds.erase(I); + else + ++I; + } + + LockstepReverseIterator LRI(Preds); + SmallVector Candidates; + unsigned InstNum = 0, MemoryInstNum = 0; + ModelledPHISet NeededPHIs; + SmallPtrSet PHIContents; + analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); + unsigned NumOrigPHIs = NeededPHIs.size(); + + while (LRI.isValid()) { + auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, + NeededPHIs, PHIContents); + if (!Cand) + break; + Cand->calculateCost(NumOrigPHIs, Preds.size()); + Candidates.emplace_back(*Cand); + --LRI; + } + + std::stable_sort( + Candidates.begin(), Candidates.end(), + [](const SinkingInstructionCandidate &A, + const SinkingInstructionCandidate &B) { return A >= B; }); + DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C + : Candidates) dbgs() + << " " << C << "\n";); + + // Pick the top candidate, as long it is positive! + if (Candidates.empty() || Candidates.front().Cost <= 0) + return 0; + auto C = Candidates.front(); + + DEBUG(dbgs() << " -- Sinking: " << C << "\n"); + BasicBlock *InsertBB = BBEnd; + if (C.Blocks.size() < NumOrigPreds) { + DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs()); + dbgs() << "\n"); + InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); + if (!InsertBB) { + DEBUG(dbgs() << " -- FAILED to split edge!\n"); + // Edge couldn't be split. + return 0; + } + } + + for (unsigned I = 0; I < C.NumInstructions; ++I) + sinkLastInstruction(C.Blocks, InsertBB); + + return C.NumInstructions; +} + +void GVNSink::sinkLastInstruction(ArrayRef Blocks, + BasicBlock *BBEnd) { + SmallVector Insts; + for (BasicBlock *BB : Blocks) + Insts.push_back(BB->getTerminator()->getPrevNode()); + Instruction *I0 = Insts.front(); + + SmallVector NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + // Update metadata and IR flags. + for (auto *I : Insts) + if (I != I0) { + combineMetadataForCSE(I0, I); + I0->andIRFlags(I); + } + + for (auto *I : Insts) + if (I != I0) + I->replaceAllUsesWith(I0); + foldPointlessPHINodes(BBEnd); + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); + + NumRemoved += Insts.size() - 1; +} + +//////////////////////////////////////////////////////////////////////////////// +// Pass machinery / boilerplate + +class GVNSinkLegacyPass : public FunctionPass { +public: + static char ID; + + GVNSinkLegacyPass() : FunctionPass(ID) { + initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + GVNSink G; + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + } +}; +} // namespace + +PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { + GVNSink G; + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +char GVNSinkLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) + +FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } Index: llvm/trunk/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalar.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalar.cpp @@ -48,6 +48,7 @@ initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); + initializeGVNSinkLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); Index: llvm/trunk/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/Local.cpp +++ llvm/trunk/lib/Transforms/Utils/Local.cpp @@ -2109,3 +2109,48 @@ !F->doesNotAccessMemory()) CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin); } + +bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: + // Many arithmetic intrinsics have no issue taking a + // variable, however it's hard to distingish these from + // specials such as @llvm.frameaddress that require a constant. + if (isa(I)) + return false; + + // Constant bundle operands may need to retain their constant-ness for + // correctness. + if (ImmutableCallSite(I).isBundleOperand(OpIdx)) + return false; + return true; + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::ExtractValue: + case Instruction::InsertValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::Alloca: + return false; + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = gep_type_begin(I); + for (auto E = std::next(It, OpIdx); It != E; ++It) + if (It.isStruct()) + return false; + return true; + } +} Index: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1376,53 +1376,6 @@ return true; } -// Is it legal to place a variable in operand \c OpIdx of \c I? -// FIXME: This should be promoted to Instruction. -static bool canReplaceOperandWithVariable(const Instruction *I, - unsigned OpIdx) { - // We can't have a PHI with a metadata type. - if (I->getOperand(OpIdx)->getType()->isMetadataTy()) - return false; - - // Early exit. - if (!isa(I->getOperand(OpIdx))) - return true; - - switch (I->getOpcode()) { - default: - return true; - case Instruction::Call: - case Instruction::Invoke: - // FIXME: many arithmetic intrinsics have no issue taking a - // variable, however it's hard to distingish these from - // specials such as @llvm.frameaddress that require a constant. - if (isa(I)) - return false; - - // Constant bundle operands may need to retain their constant-ness for - // correctness. - if (ImmutableCallSite(I).isBundleOperand(OpIdx)) - return false; - - return true; - - case Instruction::ShuffleVector: - // Shufflevector masks are constant. - return OpIdx != 2; - case Instruction::ExtractValue: - case Instruction::InsertValue: - // All operands apart from the first are constant. - return OpIdx == 0; - case Instruction::Alloca: - return false; - case Instruction::GetElementPtr: - if (OpIdx == 0) - return true; - gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1); - return It.isSequential(); - } -} - // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common Index: llvm/trunk/test/Transforms/GVNSink/dither.ll =================================================================== --- llvm/trunk/test/Transforms/GVNSink/dither.ll +++ llvm/trunk/test/Transforms/GVNSink/dither.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -S -gvn-sink | FileCheck %s + +; Because %tmp17 has flipped operands to its equivalents %tmp14 and %tmp7, we +; can't sink the zext as we'd need a shuffling PHI in between. +; +; Just sinking the zext isn't profitable, so ensure nothing is sunk. + +; CHECK-LABEL: @hoge +; CHECK-NOT: bb18.gvnsink.split +define void @hoge() { +bb: + br i1 undef, label %bb4, label %bb11 + +bb4: ; preds = %bb3 + br i1 undef, label %bb6, label %bb8 + +bb6: ; preds = %bb5 + %tmp = zext i16 undef to i64 + %tmp7 = add i64 %tmp, undef + br label %bb18 + +bb8: ; preds = %bb5 + %tmp9 = zext i16 undef to i64 + br label %bb18 + +bb11: ; preds = %bb10 + br i1 undef, label %bb12, label %bb15 + +bb12: ; preds = %bb11 + %tmp13 = zext i16 undef to i64 + %tmp14 = add i64 %tmp13, undef + br label %bb18 + +bb15: ; preds = %bb11 + %tmp16 = zext i16 undef to i64 + %tmp17 = add i64 undef, %tmp16 + br label %bb18 + +bb18: ; preds = %bb15, %bb12, %bb8, %bb6 + %tmp19 = phi i64 [ %tmp7, %bb6 ], [ undef, %bb8 ], [ %tmp14, %bb12 ], [ %tmp17, %bb15 ] + unreachable +} Index: llvm/trunk/test/Transforms/GVNSink/indirect-call.ll =================================================================== --- llvm/trunk/test/Transforms/GVNSink/indirect-call.ll +++ llvm/trunk/test/Transforms/GVNSink/indirect-call.ll @@ -0,0 +1,70 @@ +; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s + +declare i8 @ext(i1) + +define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) { +entry: + %cmp = icmp uge i32 %blksA, %nblks + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test1 +; CHECK: call i8 @ext +; CHECK: call i8 %ext +if.then: + %frombool1 = call i8 @ext(i1 %cmp) + br label %if.end + +if.else: + %frombool3 = call i8 %ext(i1 %cmp) + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext) { +entry: + %cmp = icmp uge i32 %blksA, %nblks + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test2 +; CHECK: call i8 %ext +; CHECK-NOT: call +if.then: + %frombool1 = call i8 %ext(i1 %cmp) + br label %if.end + +if.else: + %frombool3 = call i8 %ext(i1 %cmp) + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +define zeroext i1 @test3(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks, i8(i1)* %ext1, i8(i1)* %ext2) { +entry: + %cmp = icmp uge i32 %blksA, %nblks + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test3 +; CHECK: %[[x:.*]] = select i1 %flag, i8 (i1)* %ext1, i8 (i1)* %ext2 +; CHECK: call i8 %[[x]](i1 %cmp) +; CHECK-NOT: call +if.then: + %frombool1 = call i8 %ext1(i1 %cmp) + br label %if.end + +if.else: + %frombool3 = call i8 %ext2(i1 %cmp) + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} Index: llvm/trunk/test/Transforms/GVNSink/sink-common-code.ll =================================================================== --- llvm/trunk/test/Transforms/GVNSink/sink-common-code.ll +++ llvm/trunk/test/Transforms/GVNSink/sink-common-code.ll @@ -0,0 +1,694 @@ +; RUN: opt < %s -gvn-sink -simplifycfg -simplifycfg-sink-common=false -S | FileCheck %s + +define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test1 +; CHECK: add +; CHECK: select +; CHECK: icmp +; CHECK-NOT: br +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + br i1 %flag, label %if.then, label %if.else + +; CHECK-LABEL: test2 +; CHECK: add +; CHECK: select +; CHECK: icmp +; CHECK-NOT: br +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + %add = add i32 %nblks, %blksB + %cmp2 = icmp uge i32 %blksA, %add + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +declare i32 @foo(i32, i32) nounwind readnone + +define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test3 +; CHECK: select +; CHECK: call +; CHECK: call +; CHECK: add +; CHECK-NOT: br + +define i32 @test4(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test4 +; CHECK: select +; CHECK: store +; CHECK-NOT: store + +define i32 @test5(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test5 +; CHECK: store volatile +; CHECK: store + +define i32 @test6(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %a = add i32 %x, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %b = add i32 %x, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test6 +; CHECK: select +; CHECK: store volatile +; CHECK-NOT: store + +define i32 @test7(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test7 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile +; CHECK-NOT: load +; CHECK-NOT: store + +; The extra store in %if.then means %z and %w are not equivalent. +define i32 @test9(i1 zeroext %flag, i32 %x, i32* %y, i32* %p) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + store i32 7, i32* %p + %z = load volatile i32, i32* %y + store i32 6, i32* %p + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test9 +; CHECK: add +; CHECK: add + +%struct.anon = type { i32, i32 } + +; The GEP indexes a struct type so cannot have a variable last index. +define i32 @test10(i1 zeroext %flag, i32 %x, i32* %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 5 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + store volatile i32 %x, i32* %gepa + br label %if.end + +if.else: + %dummy1 = add i32 %x, 6 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + store volatile i32 %x, i32* %gepb + br label %if.end + +if.end: + ret i32 1 +} + +; CHECK-LABEL: test10 +; CHECK: getelementptr +; CHECK: store volatile +; CHECK: getelementptr +; CHECK: store volatile + +; The shufflevector's mask operand cannot be merged in a PHI. +define i32 @test11(i1 zeroext %flag, i32 %w, <2 x i32> %x, <2 x i32> %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = shufflevector <2 x i32> %x, <2 x i32> %y, <2 x i32> + br label %if.end + +if.end: + %p = phi <2 x i32> [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test11 +; CHECK: shufflevector +; CHECK: shufflevector + +; We can't common an intrinsic! +define i32 @test12(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %w, 5 + %sv1 = call i32 @llvm.ctlz.i32(i32 %x) + br label %if.end + +if.else: + %dummy1 = add i32 %w, 6 + %sv2 = call i32 @llvm.cttz.i32(i32 %x) + br label %if.end + +if.end: + %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} + +declare i32 @llvm.ctlz.i32(i32 %x) readnone +declare i32 @llvm.cttz.i32(i32 %x) readnone + +; CHECK-LABEL: test12 +; CHECK: call i32 @llvm.ctlz +; CHECK: call i32 @llvm.cttz + +; The TBAA metadata should be properly combined. +define i32 @test13(i1 zeroext %flag, i32 %x, i32* %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %z = load volatile i32, i32* %y + %a = add i32 %z, 5 + store volatile i32 %a, i32* %y, !tbaa !3 + br label %if.end + +if.else: + %w = load volatile i32, i32* %y + %b = add i32 %w, 7 + store volatile i32 %b, i32* %y, !tbaa !4 + br label %if.end + +if.end: + ret i32 1 +} + +!0 = !{ !"an example type tree" } +!1 = !{ !"int", !0 } +!2 = !{ !"float", !0 } +!3 = !{ !"const float", !2, i64 0 } +!4 = !{ !"special float", !2, i64 1 } + +; CHECK-LABEL: test13 +; CHECK-DAG: select +; CHECK-DAG: load volatile +; CHECK: store volatile {{.*}}, !tbaa !0 +; CHECK-NOT: load +; CHECK-NOT: store + +; The call should be commoned. +define i32 @test13a(i1 zeroext %flag, i32 %w, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %sv1 = call i32 @bar(i32 %x) + br label %if.end + +if.else: + %sv2 = call i32 @bar(i32 %y) + br label %if.end + +if.end: + %p = phi i32 [ %sv1, %if.then ], [ %sv2, %if.else ] + ret i32 1 +} +declare i32 @bar(i32) + +; CHECK-LABEL: test13a +; CHECK: %[[x:.*]] = select i1 %flag +; CHECK: call i32 @bar(i32 %[[x]]) + +; The load should be commoned. +define i32 @test14(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 1 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv1 = load i32, i32* %gepa + %cmp1 = icmp eq i32 %sv1, 56 + br label %if.end + +if.else: + %dummy2 = add i32 %x, 4 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv2 = load i32, i32* %gepb + %cmp2 = icmp eq i32 %sv2, 57 + br label %if.end + +if.end: + %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test14 +; CHECK: getelementptr +; CHECK: load +; CHECK-NOT: load + +; The load should be commoned. +define i32 @test15(i1 zeroext %flag, i32 %w, i32 %x, i32 %y, %struct.anon* %s) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %dummy = add i32 %x, 1 + %gepa = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 + %sv1 = load i32, i32* %gepa + %ext1 = zext i32 %sv1 to i64 + %cmp1 = icmp eq i64 %ext1, 56 + br label %if.end + +if.else: + %dummy2 = add i32 %x, 4 + %gepb = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 1 + %sv2 = load i32, i32* %gepb + %ext2 = zext i32 %sv2 to i64 + %cmp2 = icmp eq i64 %ext2, 56 + br label %if.end + +if.end: + %p = phi i1 [ %cmp1, %if.then ], [ %cmp2, %if.else ] + ret i32 1 +} + +; CHECK-LABEL: test15 +; CHECK: getelementptr +; CHECK: load +; CHECK-NOT: load + +define zeroext i1 @test_crash(i1 zeroext %flag, i32* %i4, i32* %m, i32* %n) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %tmp1 = load i32, i32* %i4 + %tmp2 = add i32 %tmp1, -1 + store i32 %tmp2, i32* %i4 + br label %if.end + +if.else: + %tmp3 = load i32, i32* %m + %tmp4 = load i32, i32* %n + %tmp5 = add i32 %tmp3, %tmp4 + store i32 %tmp5, i32* %i4 + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: test_crash +; No checks for test_crash - just ensure it doesn't crash! + +define zeroext i1 @test16(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { + +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ 0, %if.else ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test16 +; CHECK: zext +; CHECK: zext + +define zeroext i1 @test16a(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks, i8* %p) { + +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + %b1 = sext i8 %frombool1 to i32 + %b2 = trunc i32 %b1 to i8 + store i8 %b2, i8* %p + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + %a1 = sext i8 %frombool3 to i32 + %a2 = trunc i32 %a1 to i8 + store i8 %a2, i8* %p + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: test16a +; CHECK: zext +; CHECK-NOT: zext + +define zeroext i1 @test17(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = call i8 @i1toi8(i1 %cmp) + %a1 = sext i8 %frombool1 to i32 + %a2 = trunc i32 %a1 to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = call i8 @i1toi8(i1 %cmp2) + %b1 = sext i8 %frombool3 to i32 + %b2 = trunc i32 %b1 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %a2, %if.then ], [ %b2, %if.then2 ], [ 0, %entry ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} +declare i8 @i1toi8(i1) + +; FIXME: DISABLED - we don't consider this profitable. We should +; - Consider argument setup/return mov'ing for calls, like InlineCost does. +; - Consider the removal of the %obeys.0 PHI (zero PHI movement overall) + +; DISABLED-CHECK-LABEL: test17 +; DISABLED-CHECK: if.then: +; DISABLED-CHECK-NEXT: icmp uge +; DISABLED-CHECK-NEXT: br label %[[x:.*]] + +; DISABLED-CHECK: if.then2: +; DISABLED-CHECK-NEXT: add +; DISABLED-CHECK-NEXT: icmp ule +; DISABLED-CHECK-NEXT: br label %[[x]] + +; DISABLED-CHECK: [[x]]: +; DISABLED-CHECK-NEXT: %[[y:.*]] = phi i1 [ %cmp +; DISABLED-CHECK-NEXT: %[[z:.*]] = call i8 @i1toi8(i1 %[[y]]) +; DISABLED-CHECK-NEXT: br label %if.end + +; DISABLED-CHECK: if.end: +; DISABLED-CHECK-NEXT: phi i8 +; DISABLED-CHECK-DAG: [ %[[z]], %[[x]] ] +; DISABLED-CHECK-DAG: [ 0, %entry ] + +define zeroext i1 @test18(i32 %flag, i32 %blksA, i32 %blksB, i32 %nblks) { +entry: + switch i32 %flag, label %if.then3 [ + i32 0, label %if.then + i32 1, label %if.then2 + ] + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + br label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + br label %if.end + +if.then3: + %add2 = add i32 %nblks, %blksA + %cmp3 = icmp ule i32 %add2, %blksA + %frombool4 = zext i1 %cmp3 to i8 + br label %if.end + +if.end: + %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.then2 ], [ %frombool4, %if.then3 ] + %tobool4 = icmp ne i8 %obeys.0, 0 + ret i1 %tobool4 +} + +; CHECK-LABEL: test18 +; CHECK: if.end: +; CHECK-NEXT: %[[x:.*]] = phi i1 +; CHECK-DAG: [ %cmp, %if.then ] +; CHECK-DAG: [ %cmp2, %if.then2 ] +; CHECK-DAG: [ %cmp3, %if.then3 ] +; CHECK-NEXT: zext i1 %[[x]] to i8 + +; The phi is confusing - both add instructions are used by it, but +; not on their respective unconditional arcs. It should not be +; optimized. +define void @test_pr30292(i1 %cond, i1 %cond2, i32 %a, i32 %b) { +entry: + %add1 = add i32 %a, 1 + br label %succ + +one: + br i1 %cond, label %two, label %succ + +two: + call void @g() + %add2 = add i32 %a, 1 + br label %succ + +succ: + %p = phi i32 [ 0, %entry ], [ %add1, %one ], [ %add2, %two ] + br label %one +} +declare void @g() + +; CHECK-LABEL: test_pr30292 +; CHECK: phi i32 [ 0, %entry ], [ %add1, %succ ], [ %add2, %two ] + +define zeroext i1 @test_pr30244(i1 zeroext %flag, i1 zeroext %flag2, i32 %blksA, i32 %blksB, i32 %nblks) { + +entry: + %p = alloca i8 + br i1 %flag, label %if.then, label %if.else + +if.then: + %cmp = icmp uge i32 %blksA, %nblks + %frombool1 = zext i1 %cmp to i8 + store i8 %frombool1, i8* %p + br label %if.end + +if.else: + br i1 %flag2, label %if.then2, label %if.end + +if.then2: + %add = add i32 %nblks, %blksB + %cmp2 = icmp ule i32 %add, %blksA + %frombool3 = zext i1 %cmp2 to i8 + store i8 %frombool3, i8* %p + br label %if.end + +if.end: + ret i1 true +} + +; CHECK-LABEL: @test_pr30244 +; CHECK: store +; CHECK-NOT: store + +define i32 @test_pr30373a(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + %z0 = lshr i32 %y0, 8 + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + %z1 = lshr exact i32 %y1, 8 + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test_pr30373a +; CHECK: lshr +; CHECK-NOT: exact +; CHECK: } + +define i32 @test_pr30373b(i1 zeroext %flag, i32 %x, i32 %y) { +entry: + br i1 %flag, label %if.then, label %if.else + +if.then: + %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone + %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone + %z0 = lshr exact i32 %y0, 8 + br label %if.end + +if.else: + %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone + %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone + %z1 = lshr i32 %y1, 8 + br label %if.end + +if.end: + %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ] + %yy = phi i32 [ %z0, %if.then ], [ %z1, %if.else ] + %ret = add i32 %xx, %yy + ret i32 %ret +} + +; CHECK-LABEL: test_pr30373b +; CHECK: lshr +; CHECK-NOT: exact +; CHECK: } + +; CHECK: !0 = !{!1, !1, i64 0} +; CHECK: !1 = !{!"float", !2} +; CHECK: !2 = !{!"an example type tree"} Index: llvm/trunk/test/Transforms/GVNSink/struct.ll =================================================================== --- llvm/trunk/test/Transforms/GVNSink/struct.ll +++ llvm/trunk/test/Transforms/GVNSink/struct.ll @@ -0,0 +1,71 @@ +; RUN: opt -gvn-sink -S < %s | FileCheck %s + +%struct = type {i32, i32} +%struct2 = type { [ 2 x i32], i32 } + +; Struct indices cannot be variant. + +; CHECK-LABEL: @f() { +; CHECK: getelementptr +; CHECK: getelementptr +define void @f() { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + %tmp = getelementptr inbounds %struct, %struct* null, i64 0, i32 1 + br label %bb4 + +bb2: ; preds = %bb + %tmp3 = getelementptr inbounds %struct, %struct* null, i64 0, i32 0 + br label %bb4 + +bb4: ; preds = %bb2, %bb1 + %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ] + ret void +} + +; Struct indices cannot be variant. + +; CHECK-LABEL: @g() { +; CHECK: getelementptr +; CHECK: getelementptr +define void @g() { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + %tmp = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 1 + br label %bb4 + +bb2: ; preds = %bb + %tmp3 = getelementptr inbounds %struct2, %struct2* null, i64 0, i32 0, i32 0 + br label %bb4 + +bb4: ; preds = %bb2, %bb1 + %tmp5 = phi i32 [ 1, %bb1 ], [ 0, %bb2 ] + ret void +} + + +; ... but the first parameter to a GEP can. + +; CHECK-LABEL: @h() { +; CHECK: getelementptr +; CHECK-NOT: getelementptr +define void @h() { +bb: + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + %tmp = getelementptr inbounds %struct, %struct* null, i32 0, i32 0 + br label %bb4 + +bb2: ; preds = %bb + %tmp3 = getelementptr inbounds %struct, %struct* null, i32 1, i32 0 + br label %bb4 + +bb4: ; preds = %bb2, %bb1 + %tmp5 = phi i32 [ 0, %bb1 ], [ 1, %bb2 ] + ret void +} \ No newline at end of file