diff --git a/llvm/include/llvm/Analysis/ConvergenceUtils.h b/llvm/include/llvm/Analysis/ConvergenceUtils.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/ConvergenceUtils.h @@ -0,0 +1,92 @@ +//===- ConvergenceUtils.h -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Convergence info and convergence-aware uniform info for LLVM IR +/// +/// This differs from traditional divergence analysis by taking convergence +/// intrinsics into account. +// +//===----------------------------------------------------------------------===// + +#ifndef CONVERGENCEUTILS_H +#define CONVERGENCEUTILS_H + +#include "CycleInfo.h" +#include "GenericConvergenceUtils.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstrTypes.h" + +namespace llvm { + +class TargetTransformInfo; + +/// \brief Convergence info for LLVM IR. +class ConvergenceInfo : public GenericConvergenceInfo { +public: + static ConvergenceInfo compute(const DominatorTree &domTree); + + static OperandBundleDef makeOperandBundleDef(ConvergentOperation *op); + + ConvergentOperation *createIntrinsic(ConvergentOperation::Kind kind, + ConvergentOperation *parent, + BasicBlock *block, + BasicBlock::iterator before, + const Twine &name = ""); +}; + +using ConvergentOperation = GenericConvergentOperation; + +/// Analysis pass which computes \ref ConvergenceInfo. +class ConvergenceInfoAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = ConvergenceInfo; + + /// Run the analysis pass over a function and produce a dominator tree. + Result run(Function &F, FunctionAnalysisManager &); +}; + +/// Printer pass for the \c ConvergenceInfo. +class ConvergenceInfoPrinterPass + : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit ConvergenceInfoPrinterPass(raw_ostream &OS); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy analysis pass which computes \ref ConvergenceInfo. +class ConvergenceInfoWrapperPass : public FunctionPass { + Function *m_function = nullptr; + ConvergenceInfo m_convergenceInfo; + +public: + static char ID; + + ConvergenceInfoWrapperPass(); + + ConvergenceInfo &getConvergenceInfo() { return m_convergenceInfo; } + const ConvergenceInfo &getConvergenceInfo() const { + return m_convergenceInfo; + } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; +}; + +} // namespace llvm + +#endif // CONVERGENCEUTILS_H diff --git a/llvm/include/llvm/Analysis/GenericConvergenceAnalysis.h b/llvm/include/llvm/Analysis/GenericConvergenceAnalysis.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/GenericConvergenceAnalysis.h @@ -0,0 +1,123 @@ +//===- GenericConvergenceAnalysis.h ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Generic convergence analysis +/// +/// Algorithm for filling a \ref GenericConvergenceInfo. +/// +/// Note that if you only want to use the _results_ of convergence analysis, you +/// do not need to include this file. In particular, no other header file should +/// include this header. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_GENERICCONVERGENCEANALYSIS_H +#define LLVM_GENERICCONVERGENCEANALYSIS_H + +#include "GenericConvergenceUtils.h" +#include "llvm/Support/GenericDomTree.h" + +#define DEBUG_TYPE "generic-convergence-analysis" + +namespace llvm { + +/// \brief Type-erased base class for convergence analysis. +class GenericConvergenceAnalysisBase { +private: + using ConvergentOperation = GenericConvergentOperationBase; + +protected: + using ConvergenceBlockInfo = GenericConvergenceInfoBase::ConvergenceBlockInfo; + +private: + const CfgInterface &m_iface; + GenericConvergenceInfoBase &m_convergenceInfo; + GenericCycleInfoBase &m_cycleInfo; + const GenericDominatorTreeBase &m_domTree; + + SmallVector, 16> + m_pendingExtensions; + DenseMap m_innermostExtension; + + SmallVector m_hearts; + +public: + GenericConvergenceAnalysisBase(const CfgInterface &iface, + GenericConvergenceInfoBase &convergenceInfo, + GenericCycleInfoBase &cycleInfo, + const GenericDominatorTreeBase &domTree) + : m_iface(iface), m_convergenceInfo(convergenceInfo), + m_cycleInfo(cycleInfo), m_domTree(domTree) {} + + GenericConvergenceAnalysisBase(const GenericConvergenceAnalysisBase &) = + delete; + GenericConvergenceAnalysisBase & + operator=(const GenericConvergenceAnalysisBase &) = delete; + + void run(); + + GenericConvergenceInfoBase &getConvergenceInfo() { return m_convergenceInfo; } + const GenericDominatorTreeBase &getDomTree() const { return m_domTree; } + +protected: + /// \brief Visit all convergent operations. + /// + /// The CFG-specific implementation must call \ref visitConvergentOperation + /// for all relevant operations. Parents (in terms of convergence control) + /// must be visited before children, and operations within the same block + /// must be visited in-order. + virtual void visitConvergentOperations() = 0; + + void visitConvergentOperation(ConvergentOperation *parent, + ConvergentOperation::Kind kind, + CfgBlockRef block, + CfgInstructionRef instruction); +}; + +/// \brief Convergence analysis implementation. +/// +/// Derive from this class using CRTP to implement the CFG- or target-specific +/// bits. +template +class GenericConvergenceAnalysis : public GenericConvergenceAnalysisBase { +public: + using CfgTraits = CfgTraitsT; + using BlockRef = typename CfgTraits::BlockRef; + using InstructionRef = typename CfgTraits::InstructionRef; + using Cycle = GenericCycle; + using ConvergenceInfo = GenericConvergenceInfo; + using ConvergentOperation = GenericConvergentOperation; + using DomTree = + DominatorTreeBase::element_type, + false>; + + GenericConvergenceAnalysis(GenericConvergenceInfo &convergenceInfo, + const DomTree &domTree) + : GenericConvergenceAnalysisBase(m_ifaceImpl, convergenceInfo, + convergenceInfo.getCycleInfo(), domTree), + m_ifaceImpl(CfgTraits::getBlockParent(domTree.getRoot())) {} + + ConvergenceInfo &getConvergenceInfo() { + return static_cast( + GenericConvergenceAnalysisBase::getConvergenceInfo()); + } + const DomTree &getDomTree() const { + return static_cast( + GenericConvergenceAnalysisBase::getDomTree()); + } + +protected: + CfgInterfaceImpl m_ifaceImpl; +}; + +} // namespace llvm + +#undef DEBUG_TYPE + +#endif // LLVM_GENERICCONVERGENCEANALYSIS_H diff --git a/llvm/include/llvm/Analysis/GenericConvergenceUtils.h b/llvm/include/llvm/Analysis/GenericConvergenceUtils.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/GenericConvergenceUtils.h @@ -0,0 +1,508 @@ +//===- GenericConvergenceUtils.h - ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Data structures and algorithms for analyses related to convergence. +/// +/// \section Convergence info +/// +/// \ref GenericConvergenceInfo provides convenient access to information about +/// how convergent operations relate to each other via convergence control +/// tokens. +/// +/// It contains: +/// - a summary of convergent operations in a tree structure according to +/// convergence control tokens +/// - a cycle info that may be adjusted to ensure that controlled convergent +/// operations are contained in the cycle in which their convergence control +/// token was defined +/// - information about the relevant heart in each cycle +/// +/// +/// \subsection Adjusted cycle info +/// +/// Example 1: +/// Suppose there is a self-loop at A with an anchor or loop heart intrinsic +/// inside the loop, and a convergent operation using that token in E. +/// +/// | +/// A] %a = anchor +/// | +/// B +/// |\ +/// | C +/// |/ \ +/// D | +/// | E %e = user (%a) +/// +/// The cycle info is adjusted so that A, B, and C are all part of the cycle +/// (but not D or E). Furthermore, the operation in E is marked as "end of +/// cycle". +/// +/// Example 2 (nested cycles): +/// +/// | +/// A<-\ %a = heart +/// | | +/// B] | %b = heart (%a) +/// | | +/// C>-/ +/// | +/// D %d1 = user (%b) +/// %d2 = user (%a) +/// +/// The inner cycle now contains blocks B and C, the outer cycle contains +/// A, B, and C. Both "user" convergent operations are marked as being an +/// "end of (their respective) cycle". +/// +/// +/// \subsection Hearts of cycles +/// +/// The effective heart of a cycle can be contained in a descendant cycle, if +/// the heart refers to a convergence control token that is defined outside. +/// +/// Example 1 (effective heart in a child cycle): +/// +/// | +/// A %a = anchor +/// | +/// /->B +/// | | +/// | C] %c = heart (%a) +/// | | +/// ^-B %b = anchor +/// | | +/// | C] %c = heart (%b) +/// | | +/// ^- class DominatorTreeBase; + +template class GenericConvergenceInfo; + +class GenericConvergentOperationBase { +public: + enum Kind { + /// Defines an initial convergence token referring to the function + /// entry / caller. + Entry, + + /// Defines an initial convergence token with implementation-defined + /// convergence. + Anchor, + + /// Loop heart intrinsic: this is where loop iterations are counted. + Heart, + + /// Defines a convergence token that is effectively a copy of its parent + /// (typically a pointless loop heart intrinsic). + Copy, + + /// Uses a convergence token but does not define one. + User, + + /// Uncontrolled intrinsic: neither uses nor defines a token. + Uncontrolled, + }; + +protected: + friend class GenericConvergenceAnalysisBase; + friend class GenericConvergenceInfoBase; + + CfgBlockRef m_block; + CfgInstructionRef m_instruction; + Kind m_kind; + + GenericCycleBase *m_cycle = nullptr; + + GenericConvergentOperationBase *m_parent = nullptr; + std::vector m_children; + +public: + GenericConvergentOperationBase(GenericConvergentOperationBase *parent, + Kind kind, CfgBlockRef block, + CfgInstructionRef instruction) + : m_block(block), m_instruction(instruction), m_kind(kind), + m_parent(parent) {} + + GenericConvergentOperationBase *getParent() const { return m_parent; } + CfgBlockRef getBlock() const { return m_block; } + Kind getKind() const { return m_kind; } + const GenericCycleBase *getCycle() const { return m_cycle; } + CfgInstructionRef getInstruction() const { return m_instruction; } + + using const_child_iterator = + std::vector::const_iterator; + + const_child_iterator children_begin() const { return m_children.begin(); } + const_child_iterator children_end() const { return m_children.end(); } + + iterator_range children() const { + return {children_begin(), children_end()}; + } +}; + +template +class GenericConvergentOperation : public GenericConvergentOperationBase { +private: + friend class GenericConvergenceInfo; + + // Adaptor which changes an iterator over GenericConvergentOperationBase * + // into an iterator over GenericConvergentOperation. + template struct cast_iterator_adaptor; + + template + using cast_iterator_adaptor_base = iterator_adaptor_base< + cast_iterator_adaptor, BaseIteratorT, + typename std::iterator_traits::iterator_category, + GenericConvergentOperation *, // value_type + typename std::iterator_traits::difference_type, + // pointer (not really usable, but we need to put something here) + GenericConvergentOperation **, + // reference (not a true reference, because operator* doesn't return one) + GenericConvergentOperation *>; + + template + struct cast_iterator_adaptor : cast_iterator_adaptor_base { + using Base = cast_iterator_adaptor_base; + + cast_iterator_adaptor() = default; + explicit cast_iterator_adaptor(BaseIteratorT &&it) + : Base(std::forward(it)) {} + + auto operator*() const { + return static_cast(*this->I); + } + }; + +public: + using CfgTraits = CfgTraitsT; + using BlockRef = typename CfgTraits::BlockRef; + using InstructionRef = typename CfgTraits::InstructionRef; + using ValueRef = typename CfgTraits::ValueRef; + + GenericConvergentOperation *getParent() const { + return static_cast(m_parent); + } + BlockRef getBlock() const { return CfgTraits::unwrapRef(m_block); } + const GenericCycle *getCycle() const { + return static_cast *>(m_cycle); + } + InstructionRef getInstruction() const { + return CfgTraits::unwrapRef(m_instruction); + } + + cast_iterator_adaptor + children_begin() const { + return {m_children.begin()}; + } + cast_iterator_adaptor + children_end() const { + return {m_children.end()}; + } + + auto children() const { + return llvm::make_range(children_begin(), children_end()); + } +}; + +/// \brief Type-erased information about CFG convergence. +class GenericConvergenceInfoBase { + friend class GenericConvergenceAnalysisBase; + +public: + using ConvergentOperation = GenericConvergentOperationBase; + +protected: + struct ConvergenceBlockInfo { + /// Convergent operations in the block, in order. + std::vector operations; + }; + + DenseMap> + m_operation; + DenseMap m_block; + DenseMap m_heart; + std::vector m_roots; + + const ConvergenceBlockInfo &lookupBlock(CfgBlockRef block) const { + auto blockIt = m_block.find(block); + if (blockIt != m_block.end()) + return blockIt->second; + + static ConvergenceBlockInfo empty; + return empty; + } + + GenericConvergenceInfoBase(const GenericConvergenceInfoBase &other) = delete; + GenericConvergenceInfoBase & + operator=(const GenericConvergenceInfoBase &other) = delete; + +public: + GenericConvergenceInfoBase() = default; + GenericConvergenceInfoBase(GenericConvergenceInfoBase &&other) = default; + GenericConvergenceInfoBase & + operator=(GenericConvergenceInfoBase &&other) = default; + + void clear(); + + CfgBlockRef getHeartBlock(const GenericCycleBase *cycle) const; + GenericConvergentOperationBase *getOperation(CfgInstructionRef instruction); + + using const_root_iterator = + std::vector::const_iterator; + + const_root_iterator roots_begin() const { return m_roots.begin(); } + const_root_iterator roots_end() const { return m_roots.end(); } + + auto roots() const { return llvm::make_range(roots_begin(), roots_end()); } + + using const_block_iterator = + std::vector::const_iterator; + + const_block_iterator block_begin(CfgBlockRef block) const { + return lookupBlock(block).operations.begin(); + } + const_block_iterator block_end(CfgBlockRef block) const { + return lookupBlock(block).operations.end(); + } + + auto block(CfgBlockRef block) const { + const ConvergenceBlockInfo &blockInfo = lookupBlock(block); + return llvm::make_range(blockInfo.operations.begin(), + blockInfo.operations.end()); + } + + bool validate(const GenericCycleInfoBase &cycleInfo) const; + void print(const CfgPrinter &printer, const GenericCycleInfoBase &cycleInfo, + raw_ostream &out) const; + + // Updating the convergence info based on changes made to the IR. + // + // Note: Changes that would extend/shrink cycles are currently _not_ + // supported here at all! + ConvergentOperation * + insertOperation(const CfgInterface &iface, GenericCycleInfoBase &cycleInfo, + ConvergentOperation *parent, ConvergentOperation::Kind kind, + CfgBlockRef block, CfgInstructionRef instruction); + void eraseOperation(GenericCycleInfoBase &cycleInfo, ConvergentOperation *op); + +private: + void registerHeart(ConvergentOperation *heart); + void unregisterHeart(ConvergentOperation *heart); +}; + +/// \brief Base class for CFG-specific convergence info. +/// +/// Derive from this class using CRTP and implement the CFG-specific bits of +/// the analysis. +template +class GenericConvergenceInfo : public GenericConvergenceInfoBase { +public: + using CfgTraits = CfgTraitsT; + using BlockRef = typename CfgTraits::BlockRef; + using InstructionRef = typename CfgTraits::InstructionRef; + using CycleInfo = GenericCycleInfo; + using ConvergentOperation = GenericConvergentOperation; + +private: + CycleInfo m_cycleInfo; + +public: + void clear() { + GenericConvergenceInfoBase::clear(); + m_cycleInfo.reset(); + } + + CycleInfo &getCycleInfo() { return m_cycleInfo; } + const CycleInfo &getCycleInfo() const { return m_cycleInfo; } + + BlockRef getHeartBlock(const GenericCycle *cycle) const { + return CfgTraits::unwrapRef( + GenericConvergenceInfoBase::getHeartBlock(cycle)); + } + + ConvergentOperation *getOperation(InstructionRef instruction) { + return static_cast( + GenericConvergenceInfoBase::getOperation( + CfgTraits::wrapRef(instruction))); + } + + using const_root_iterator = + typename ConvergentOperation::template cast_iterator_adaptor< + GenericConvergenceInfoBase::const_root_iterator>; + + auto roots_begin() const { return const_root_iterator{m_roots.begin()}; } + auto roots_end() const { return const_root_iterator{m_roots.end()}; } + + auto roots() const { return llvm::make_range(roots_begin(), roots_end()); } + + using const_block_iterator = + typename ConvergentOperation::template cast_iterator_adaptor< + GenericConvergenceInfoBase::const_block_iterator>; + + auto block_begin(BlockRef block) const { + return const_block_iterator{ + GenericConvergenceInfoBase::block_begin(CfgTraits::wrapRef(block))}; + } + auto block_end(BlockRef block) const { + return const_block_iterator{ + GenericConvergenceInfoBase::block_end(CfgTraits::wrapRef(block))}; + } + + auto block(BlockRef block) const { + auto range = GenericConvergenceInfoBase::block(CfgTraits::wrapRef(block)); + return llvm::make_range(const_block_iterator(range.begin()), + const_block_iterator(range.end())); + } + + bool validate() const { + return GenericConvergenceInfoBase::validate(m_cycleInfo); + } + void print(raw_ostream &out) const { + GenericConvergenceInfoBase::print( + CfgPrinterImpl(CfgInterfaceImpl( + CfgTraits::getBlockParent(m_cycleInfo.getRoot()->getHeader()))), + m_cycleInfo, out); + } + LLVM_DUMP_METHOD + void dump() const { print(dbgs()); } + + // Updating the convergence info based on changes made to the IR. + // + // Note: Changes that would extend/shrink cycles are currently _not_ + // supported here at all! + ConvergentOperation *insertOperation(ConvergentOperation *parent, + typename ConvergentOperation::Kind kind, + BlockRef block, + InstructionRef instruction) { + return static_cast( + GenericConvergenceInfoBase::insertOperation( + CfgInterfaceImpl(CfgTraits::getBlockParent(block)), + m_cycleInfo, parent, kind, CfgTraits::wrapRef(block), + CfgTraits::wrapRef(instruction))); + } + + void eraseOperation(ConvergentOperation *op) { + GenericConvergenceInfoBase::eraseOperation(m_cycleInfo, op); + } +}; + +/// \brief Type-erased heart-adjusted post order. +/// +/// The heart-adjusted _reverse_ post order is used as traversal order during +/// uniform analysis and the reconvergence transform. +/// +/// The heart-adjusted RPO differs from the regular RPO in that cycles are +/// treated as contracted nodes which are later expanded as an RPO starting at +/// the heart. For cycles without an effective heart, we start at the header. +/// +/// These modifications help the reconvergence transform ensure that the +/// wave-level CFG has the required convergence guarantees when post-dominator +/// reconvergence is used, and they help the uniform analysis determine +/// uniformity in essentially a single pass while being consistent with the +/// same convergence guarantees. +/// +/// Note that this function computes the _non-reversed_ heart-adjusted +/// post order, so you'd typically use it as llvm::reverse(hapo). +class HeartAdjustedPostOrderBase { + std::vector m_order; + +public: + using const_iterator = std::vector::const_iterator; + + bool empty() const { return m_order.empty(); } + size_t size() const { return m_order.size(); } + + void clear() { m_order.clear(); } + void compute(const CfgInterface &iface, + const GenericConvergenceInfoBase &convergenceInfo, + const GenericCycleInfoBase &cycleInfo, + const GenericDominatorTreeBase &domTree); + + const_iterator begin() const { return m_order.begin(); } + const_iterator end() const { return m_order.end(); } + CfgBlockRef operator[](size_t idx) const { return m_order[idx]; } +}; + +/// \brief Convergence-adjusted post order. +template +class HeartAdjustedPostOrder : public HeartAdjustedPostOrderBase { +public: + using CfgTraits = CfgTraitsT; + using BlockRef = typename CfgTraits::BlockRef; + using DomTree = + DominatorTreeBase::element_type, + false>; + + void compute(const GenericConvergenceInfo &convergenceInfo, + const DomTree &domTree) { + HeartAdjustedPostOrderBase::compute( + CfgInterfaceImpl( + CfgTraits::getBlockParent(domTree.getRoot())), + convergenceInfo, convergenceInfo.getCycleInfo(), domTree); + } + + auto begin() const { + return CfgTraits::unwrapIterator(HeartAdjustedPostOrderBase::begin()); + } + auto end() const { + return CfgTraits::unwrapIterator(HeartAdjustedPostOrderBase::end()); + } + + BlockRef operator[](size_t idx) const { + return CfgTraits::unwrapRef(HeartAdjustedPostOrderBase::operator[](idx)); + } +}; + +} // namespace llvm + +#endif // LLVM_GENERICCONVERGENCEUTILS_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -115,6 +115,7 @@ void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); void initializeControlHeightReductionLegacyPassPass(PassRegistry&); +void initializeConvergenceInfoWrapperPassPass(PassRegistry&); void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeCrossDSOCFIPass(PassRegistry&); diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -35,6 +35,7 @@ initializeCFGOnlyPrinterLegacyPassPass(Registry); initializeCFLAndersAAWrapperPassPass(Registry); initializeCFLSteensAAWrapperPassPass(Registry); + initializeConvergenceInfoWrapperPassPass(Registry); initializeCycleInfoWrapperPassPass(Registry); initializeDependenceAnalysisWrapperPassPass(Registry); initializeDelinearizationPass(Registry); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -38,6 +38,7 @@ CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp + ConvergenceUtils.cpp CycleInfo.cpp DDG.cpp Delinearization.cpp @@ -51,6 +52,8 @@ DominanceFrontier.cpp EHPersonalities.cpp FunctionPropertiesAnalysis.cpp + GenericConvergenceAnalysis.cpp + GenericConvergenceUtils.cpp GenericCycleInfo.cpp GlobalsModRef.cpp GuardUtils.cpp diff --git a/llvm/lib/Analysis/ConvergenceUtils.cpp b/llvm/lib/Analysis/ConvergenceUtils.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/ConvergenceUtils.cpp @@ -0,0 +1,202 @@ +//===- ConvergenceUtils.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ConvergenceUtils.h" + +#include "llvm/Analysis/GenericConvergenceAnalysis.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/AbstractCallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +namespace { + +/// \brief Analysis for computing convergence info on LLVM IR. +class ConvergenceAnalysis + : public GenericConvergenceAnalysis { +public: + using Base = GenericConvergenceAnalysis; + + ConvergenceAnalysis(ConvergenceInfo &convergenceInfo, + const DominatorTree &domTree) + : Base(convergenceInfo, domTree) {} + + /// Visit all convergence-relevant intrinsics in depth-first order. + void visitConvergentOperations() final { + for (BlockRef block : depth_first(getDomTree().getRoot())) { + for (Instruction &instr : *block) { + if (auto call = dyn_cast(&instr)) { + if (!call->isConvergent()) + continue; + + ConvergentOperation *parent = nullptr; + auto ctrl = call->getOperandBundle(LLVMContext::OB_convergencectrl); + if (ctrl) { + assert(ctrl.getValue().Inputs.size() == 1); + Instruction *parentInstr = + cast(ctrl.getValue().Inputs[0].get()); + parent = getConvergenceInfo().getOperation(parentInstr); + assert(parent != nullptr); + } + + ConvergentOperation::Kind kind = + parent ? ConvergentOperation::User + : ConvergentOperation::Uncontrolled; + + if (auto *intrinsic = dyn_cast(&instr)) { + switch (intrinsic->getIntrinsicID()) { + case Intrinsic::experimental_convergence_entry: + kind = ConvergentOperation::Entry; + break; + case Intrinsic::experimental_convergence_anchor: + kind = ConvergentOperation::Anchor; + break; + case Intrinsic::experimental_convergence_loop: + kind = ConvergentOperation::Heart; + break; + } + } + + visitConvergentOperation(parent, kind, CfgTraits::wrapRef(block), + CfgTraits::wrapRef(&instr)); + } + } + } + } +}; + +} // anonymous namespace + +/// \brief Compute the convergence info for an LLVM IR function. +ConvergenceInfo ConvergenceInfo::compute(const DominatorTree &domTree) { + ConvergenceInfo info; + ConvergenceAnalysis analysis(info, domTree); + analysis.run(); + return info; +} + +/// Return the operand bundle to be used for referring to the convergence +/// control token produced by \op. +OperandBundleDef +ConvergenceInfo::makeOperandBundleDef(ConvergentOperation *op) { + assert(op->getKind() == ConvergentOperation::Entry || + op->getKind() == ConvergentOperation::Anchor || + op->getKind() == ConvergentOperation::Heart || + op->getKind() == ConvergentOperation::Copy); + + Instruction *instr = cast(op->getInstruction()); + Value *token[1] = {instr}; + assert(token[0]->getType()->isTokenTy()); + + return OperandBundleDef{"convergencectrl", token}; +} + +/// Insert an entry/anchor/loop heart intrinsic at the given position. +/// +/// Updates the convergence info. +ConvergentOperation *ConvergenceInfo::createIntrinsic( + ConvergentOperation::Kind kind, ConvergentOperation *parent, + BasicBlock *block, BasicBlock::iterator before, const Twine &name) { + IRBuilder<> builder(block, before); + + Intrinsic::ID iid; + switch (kind) { + case ConvergentOperation::Entry: + iid = Intrinsic::experimental_convergence_entry; + break; + case ConvergentOperation::Anchor: + iid = Intrinsic::experimental_convergence_anchor; + break; + case ConvergentOperation::Heart: + iid = Intrinsic::experimental_convergence_loop; + break; + default: + // Can't create User etc. here + llvm_unreachable("bad convergent operation kind"); + } + + Module *module = block->getModule(); + Function *fn = Intrinsic::getDeclaration(module, iid, {}); + SmallVector bundles; + + if (parent) + bundles.emplace_back(makeOperandBundleDef(parent)); + + CallInst *call = + builder.CreateCall(fn->getFunctionType(), fn, {}, bundles, name); + + return insertOperation(parent, kind, block, call); +} + +//===----------------------------------------------------------------------===// +// ConvergenceInfoAnalysis and related pass implementations +//===----------------------------------------------------------------------===// + +ConvergenceInfoAnalysis::Result +ConvergenceInfoAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { + auto &domTree = FAM.getResult(F); + return ConvergenceInfo::compute(domTree); +} + +AnalysisKey ConvergenceInfoAnalysis::Key; + +ConvergenceInfoPrinterPass::ConvergenceInfoPrinterPass(raw_ostream &OS) + : OS(OS) {} + +PreservedAnalyses ConvergenceInfoPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &result = AM.getResult(F); + OS << "ConvergenceInfo for function: " << F.getName() << '\n'; + result.print(OS); + return PreservedAnalyses::all(); +} + +//===----------------------------------------------------------------------===// +// ConvergenceInfoWrapperPass Implementation +//===----------------------------------------------------------------------===// + +char ConvergenceInfoWrapperPass::ID = 0; + +ConvergenceInfoWrapperPass::ConvergenceInfoWrapperPass() : FunctionPass(ID) { + initializeConvergenceInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +INITIALIZE_PASS_BEGIN(ConvergenceInfoWrapperPass, "convergenceinfo", + "Convergence Info Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(ConvergenceInfoWrapperPass, "convergenceinfo", + "Convergence Info Analysis", true, true) + +void ConvergenceInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} + +bool ConvergenceInfoWrapperPass::runOnFunction(Function &F) { + m_convergenceInfo.clear(); + + auto &domTree = getAnalysis().getDomTree(); + + m_function = &F; + m_convergenceInfo = ConvergenceInfo::compute(domTree); + return false; +} + +void ConvergenceInfoWrapperPass::print(raw_ostream &OS, const Module *) const { + OS << "ConvergenceInfo for function: " << m_function->getName() << "\n"; + m_convergenceInfo.print(OS); +} + +void ConvergenceInfoWrapperPass::releaseMemory() { + m_convergenceInfo.clear(); + m_function = nullptr; +} diff --git a/llvm/lib/Analysis/GenericConvergenceAnalysis.cpp b/llvm/lib/Analysis/GenericConvergenceAnalysis.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/GenericConvergenceAnalysis.cpp @@ -0,0 +1,216 @@ +//===- GenericConvergenceAnalysis.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Implementation of generic convergence analysis. +/// +/// Convergence analysis collects convergence intrinsics for quick reference and +/// adjusts the cycle structure of a CycleInfo that is local to the convergence +/// info to reflect the effects of convergence intrinsics that enforce +/// non-reconvergence. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/GenericConvergenceAnalysis.h" + +#define DEBUG_TYPE "generic-convergence-analysis" + +using namespace llvm; + +void GenericConvergenceAnalysisBase::run() { + // Compute our own cycle info since we will modify it based on convergence + // intrinsics. + m_cycleInfo.compute(m_iface, m_domTree.getRootNode()->getBlock()); + + // Step 1: CFG-specific program traversal to collect all relevant operations + // and their linkage. + visitConvergentOperations(); + + // Step 2: Work through the cycle extensions. + SmallVector extendedBlocks; + + while (!m_pendingExtensions.empty()) { + CfgBlockRef const extendBlock = m_pendingExtensions.back().first; + GenericCycleBase *const extendCycle = m_pendingExtensions.back().second; + m_pendingExtensions.pop_back(); + + GenericCycleBase *&innermost = m_innermostExtension[extendBlock]; + if (innermost && m_cycleInfo.contains(extendCycle, innermost)) { + // Already done (possibly implicitly) by a previous extension of the same + // block. + continue; + } + innermost = extendCycle; + + if (extendCycle->containsBlock(extendBlock)) { + // Already done by a previous extension of a different block. + continue; + } + + m_cycleInfo.extendCycle(m_iface, extendCycle, extendBlock, &extendedBlocks); + + // Update an operation that should now be considered to be part of the + // given extended cycle. + auto updateOperation = [this](ConvergentOperation *op, + GenericCycleBase *extendCycle) { + if (extendCycle == op->m_cycle) + return; + + if (m_cycleInfo.contains(op->m_cycle, extendCycle)) { + // Handle a case such as the following, where the operation was + // assigned to an ancestor of extendCycle: + // + // | + // A] <-- extendCycle + // | + // B <-- op + // | + // C <-- extendBlock + // | + // + // Due to the nesting rule for convergence regions, this can + // only happen when `op` itself is anchored in B, which means that + // it cannot be a true heart. + assert(op->getKind() != ConvergentOperation::Heart); + op->m_cycle = extendCycle; + + SmallVector stack; + for (;;) { + for (ConvergentOperation *child : op->m_children) { + if (child->getKind() == ConvergentOperation::Heart) + continue; + if (child->m_cycle == extendCycle) + continue; // already updated? + + child->m_cycle = extendCycle; + if (!child->m_children.empty()) + stack.push_back(child); + + CfgBlockRef block = child->m_block; + if (!extendCycle->containsBlock(block)) + m_pendingExtensions.emplace_back(block, extendCycle); + } + + if (stack.empty()) + break; + op = stack.pop_back_val(); + } + } else { + // The cycle with which this operation is associated must now be + // a descendant of the extended cycle, due to the nesting rule for + // convergence regions. + assert(m_cycleInfo.contains(extendCycle, op->m_cycle)); + } + }; + + if (!llvm::is_contained(extendedBlocks, extendBlock)) { + auto blockIt = m_convergenceInfo.m_block.find(extendBlock); + if (blockIt != m_convergenceInfo.m_block.end()) { + GenericCycleBase *cycle = nullptr; + for (ConvergentOperation *op : + llvm::reverse(m_convergenceInfo.m_block[extendBlock].operations)) { + if (cycle) + updateOperation(op, cycle); + cycle = op->m_cycle; + } + } + } + + for (CfgBlockRef block : extendedBlocks) { + auto blockIt = m_convergenceInfo.m_block.find(block); + if (blockIt == m_convergenceInfo.m_block.end()) + continue; + + for (ConvergentOperation *op : blockIt->second.operations) + updateOperation(op, extendCycle); + } + extendedBlocks.clear(); + } + + // Step 3: Register hearts within their final relevant cycles. + for (ConvergentOperation *heart : m_hearts) + m_convergenceInfo.registerHeart(heart); + + assert(m_convergenceInfo.validate(m_cycleInfo)); +} + +/// \brief Callback for \ref visitConvergentOperations +/// +/// Record the encountered operation in the tree, in the m_operations map, +/// and in the per-block list. +/// +/// Also record the +void GenericConvergenceAnalysisBase::visitConvergentOperation( + ConvergentOperation *parent, ConvergentOperation::Kind kind, + CfgBlockRef block, CfgInstructionRef instruction) { + assert((kind == ConvergentOperation::Anchor || + kind == ConvergentOperation::Entry || + kind == ConvergentOperation::Uncontrolled) == !parent); + + auto op = + std::make_unique(parent, kind, block, instruction); + + if (parent) { + parent->m_children.push_back(op.get()); + } else { + m_convergenceInfo.m_roots.push_back(op.get()); + } + + op->m_cycle = m_cycleInfo.getCycle(block); + + if (parent) { + GenericCycleBase *parentCycle = parent->m_cycle; + bool isTrueHeart = false; + + if (parentCycle != op->m_cycle) { + if (m_cycleInfo.contains(parentCycle, op->m_cycle)) { + // Static validation rule: every cycle that contains a non-heart use of + // a token must also contain its definition -- so a valid program can + // only have a heart here, and it can't have more than one in this + // cycle. + assert(kind == ConvergentOperation::Heart); + isTrueHeart = true; + } else { + // Cycle extensions may be required for hearts. Example: + // + // | + // A] %outer = loop heart <-- parent + // | + // B] %inner = loop heart (%outer) <-- op + // | + // + // In this case, parentCycle is the cycle headed by A, and we are going + // to extend it to include the entire cycle headed by B, but we don't + // update op->m_cycle. + if (kind == ConvergentOperation::Heart) { + isTrueHeart = !m_cycleInfo.contains(op->m_cycle, parentCycle); + if (!isTrueHeart) + op->m_kind = kind = ConvergentOperation::Copy; + } + + if (kind != ConvergentOperation::Heart) + op->m_cycle = parentCycle; + + m_pendingExtensions.emplace_back(block, parentCycle); + } + } + + if (kind == ConvergentOperation::Heart) { + if (isTrueHeart) { + m_hearts.push_back(op.get()); + } else { + op->m_kind = ConvergentOperation::Copy; + } + } + } + + m_convergenceInfo.m_block[block].operations.push_back(op.get()); + + assert(!m_convergenceInfo.m_operation.count(instruction)); + m_convergenceInfo.m_operation[instruction] = std::move(op); +} diff --git a/llvm/lib/Analysis/GenericConvergenceUtils.cpp b/llvm/lib/Analysis/GenericConvergenceUtils.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/GenericConvergenceUtils.cpp @@ -0,0 +1,476 @@ +//===- GenericConvergenceUtils.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/GenericConvergenceUtils.h" + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// \brief Clear all information / reset this structure to the original state +void GenericConvergenceInfoBase::clear() { + m_block.clear(); + m_heart.clear(); + m_operation.clear(); +} + +/// \brief Return the given cycle's heart block, or null if it has none. +CfgBlockRef +GenericConvergenceInfoBase::getHeartBlock(const GenericCycleBase *cycle) const { + auto heartIt = m_heart.find(cycle); + if (heartIt != m_heart.end()) + return heartIt->second->getBlock(); + return {}; +} + +/// \brief Return the metadata for the given intrinsic. +GenericConvergentOperationBase * +GenericConvergenceInfoBase::getOperation(CfgInstructionRef instruction) { + auto it = m_operation.find(instruction); + if (it == m_operation.end()) + return nullptr; + return it->second.get(); +} + +/// \brief Perform some sanity-checks on the convergence info. +/// +/// This validation is not exhaustive. For example, it does not check that +/// independently anchored trees of intrinsics are well-nested. +bool GenericConvergenceInfoBase::validate( + const GenericCycleInfoBase &cycleInfo) const { + DenseSet allOps; + DenseSet seen; + + auto reportError = [](const char *file, int line, const char *cond) { + errs() << file << ':' << line + << ": GenericConvergenceInfo::validate: " << cond << '\n'; + }; +#define check(cond) do { \ + if (!(cond)) { \ + reportError(__FILE__, __LINE__, #cond); \ + return false; \ + } \ + } while (false) + + check(cycleInfo.validateTree()); + + for (const auto &op : m_operation) { + check(op.first == op.second->m_instruction); + check(allOps.insert(op.second.get()).second); + } + + for (ConvergentOperation *op : allOps) { + bool parentExpected = op->getKind() == ConvergentOperation::Heart || + op->getKind() == ConvergentOperation::Copy || + op->getKind() == ConvergentOperation::User; + check((op->getParent() != nullptr) == parentExpected); + check(!op->getParent() || allOps.count(op->getParent())); + check(op->getParent() || llvm::is_contained(m_roots, op)); + + for (ConvergentOperation *child : op->children()) { + check(allOps.count(child)); + check(child->getParent() == op); + check(seen.insert(child).second); // duplicate child listing? + } + seen.clear(); + + if (op->getParent()) { + bool isSameCycle = op->getParent()->getCycle() == op->getCycle(); + bool expectSameCycle = op->getKind() != ConvergentOperation::Heart; + check(isSameCycle == expectSameCycle); + check(cycleInfo.contains(op->getParent()->getCycle(), op->getCycle())); + } + + auto blockIt = m_block.find(op->getBlock()); + check(blockIt != m_block.end() && + llvm::is_contained(blockIt->second.operations, op)); + + if (op->getKind() == ConvergentOperation::Heart) { + for (const GenericCycleBase *cycle = op->m_cycle; + cycle != op->m_parent->m_cycle; cycle = cycle->getParent()) { + auto heartIt = m_heart.find(cycle); + check(heartIt != m_heart.end() && heartIt->second == op); + } + } + } + + for (const auto &block : m_block) { + check(!block.second.operations.empty()); // unnecessary allocation? + + const GenericCycleBase *cycle = nullptr; + + for (ConvergentOperation *op : block.second.operations) { + check(allOps.count(op)); + check(op->getBlock() == block.first); + check(seen.insert(op).second); // duplicate listing in block? + + check(!cycle || cycleInfo.contains(op->m_cycle, cycle)); + cycle = op->m_cycle; + } + seen.clear(); + + check(cycleInfo.contains(cycleInfo.getCycle(block.first), cycle)); + } + + for (const auto &heart : m_heart) { + check(allOps.count(heart.second)); + check(heart.second->getKind() == ConvergentOperation::Heart); + check(cycleInfo.contains(heart.first, heart.second->m_cycle)); + } + + for (ConvergentOperation *root : m_roots) { + check(allOps.count(root)); + check(!root->getParent()); + check(seen.insert(root).second); // duplicate listing of a root? + } + seen.clear(); + +#undef check + + return true; +} + +/// \brief Print convergence info to \p out. +void GenericConvergenceInfoBase::print(const CfgPrinter &printer, + const GenericCycleInfoBase &cycleInfo, + raw_ostream &out) const { + out << "Convergence-adjusted cycles:\n"; + cycleInfo.print(printer, out); + + out << "Convergent operations:\n"; + + SmallVector, 8> stack; + for (ConvergentOperation *root : llvm::reverse(m_roots)) + stack.emplace_back(root, 1); + + while (!stack.empty()) { + auto current = stack.pop_back_val(); + + for (unsigned i = 0; i < current.second; ++i) + out << " "; + + switch (current.first->getKind()) { + case ConvergentOperation::Entry: + out << "(entry) "; + break; + case ConvergentOperation::Anchor: + out << "(anchor) "; + break; + case ConvergentOperation::Heart: + out << "(heart) "; + break; + case ConvergentOperation::Copy: + out << "(copy) "; + break; + case ConvergentOperation::User: + out << "(user) "; + break; + case ConvergentOperation::Uncontrolled: + out << "(uncontrolled) "; + break; + } + + printer.printBlockName(out, current.first->getBlock()); + const GenericCycleBase *cycle = current.first->getCycle(); + if (!cycle->isRoot()) { + out << " (cycle=" << current.first->getCycle()->print(printer) << ')'; + } + out << ": "; + printer.printInstruction(out, current.first->m_instruction); + out << '\n'; + + for (const ConvergentOperation *child : + llvm::reverse(current.first->children())) + stack.emplace_back(child, current.second + 1); + } +} + +/// Insert a new convergent operation into the convergence info. +/// +/// Call this after creating a new operation to preserve the analysis result. +GenericConvergentOperationBase *GenericConvergenceInfoBase::insertOperation( + const CfgInterface &iface, GenericCycleInfoBase &cycleInfo, + ConvergentOperation *parent, ConvergentOperation::Kind kind, + CfgBlockRef block, CfgInstructionRef instruction) { + ConvergenceBlockInfo &blockInfo = m_block[block]; + + auto insertIt = llvm::lower_bound( + blockInfo.operations, instruction, + [&iface](ConvergentOperation *lhs, CfgInstructionRef rhs) { + return iface.comesBefore(lhs->getInstruction(), rhs); + }); + + auto op = + std::make_unique(parent, kind, block, instruction); + ConvergentOperation *retOp = op.get(); + + if (parent) { + if (kind != ConvergentOperation::Heart) { + op->m_cycle = parent->m_cycle; + } else { + op->m_cycle = cycleInfo.getCycle(block); + + registerHeart(op.get()); + } + + // This assertion triggers if the operation is either entirely misplaced + // or requires a cycle extension -- preserving the convergence analysis in + // the latter case is unlikely to ever be required and hasn't been + // implemented. + assert(op->m_cycle == cycleInfo.getCycle(block) || + (insertIt != blockInfo.operations.end() && + cycleInfo.contains(op->m_cycle, (*insertIt)->m_cycle))); + + parent->m_children.push_back(op.get()); + } else { + if (insertIt != blockInfo.operations.end()) + op->m_cycle = (*insertIt)->m_cycle; + else + op->m_cycle = cycleInfo.getCycle(block); + + m_roots.push_back(op.get()); + } + + blockInfo.operations.insert(insertIt, op.get()); + + assert(!m_operation.count(instruction)); + m_operation.try_emplace(instruction, std::move(op)); + + assert(validate(cycleInfo)); + + return retOp; +} + +/// Erase the given operation from the convergence info. +void GenericConvergenceInfoBase::eraseOperation(GenericCycleInfoBase &cycleInfo, + ConvergentOperation *op) { + assert(op->m_children.empty() && + "children must be erased before their parents"); + + auto blockIt = m_block.find(op->getBlock()); + assert(blockIt != m_block.end()); + + auto opIt = llvm::find(blockIt->second.operations, op); + assert(opIt != blockIt->second.operations.end()); + + // This assertion triggers if erasing this operation causes a cycle extension + // to collapse -- handling this case would be required e.g. if we wanted to + // preserve convergence analysis during dead-code elimination. + // + // Note that this assertion doesn't capture all cases of collapsing cycle + // extensions. Consider: + // + // | + // A] %a = loop heart + // | + // B] %b = loop heart (%a) + // | + // + // The loop heart %b causes an extension of cycle headed by A to fully + // include B, and removing %b causes that extension to collapse (unless some + // other use of %a keeps it alive). However, this case is difficult to + // detect, and does not trigger this assertion. + assert(cycleInfo.getCycle(op->getBlock()) == op->m_cycle || + (opIt != blockIt->second.operations.begin() && + cycleInfo.contains(op->m_cycle, (*(opIt - 1))->m_cycle)) || + (opIt + 1 != blockIt->second.operations.end() && + cycleInfo.contains(op->m_cycle, (*(opIt + 1))->m_cycle))); + + blockIt->second.operations.erase(opIt); + + if (blockIt->second.operations.empty()) + m_block.erase(blockIt); + + if (op->m_parent) { + op->m_parent->m_children.erase(llvm::find(op->m_parent->m_children, op)); + } else { + m_roots.erase(llvm::find(m_roots, op)); + } + + if (op->getKind() == ConvergentOperation::Heart) + unregisterHeart(op); + + // Delete the operation. + assert(m_operation[op->m_instruction].get() == op); + m_operation.erase(op->m_instruction); + + assert(validate(cycleInfo)); +} + +/// Helper that adds \p heart to m_heart for all relevant cycles. +void GenericConvergenceInfoBase::registerHeart(ConvergentOperation *heart) { + assert(heart->getKind() == ConvergentOperation::Heart); + for (GenericCycleBase *cycle = heart->m_cycle; + cycle != heart->m_parent->m_cycle; cycle = cycle->getParent()) { + assert(!m_heart.count(cycle)); + m_heart.try_emplace(cycle, heart); + } +} + +/// Helper that removes \p heart from m_heart for all relevant cycles. +void GenericConvergenceInfoBase::unregisterHeart(ConvergentOperation *heart) { + assert(heart->getKind() == ConvergentOperation::Heart); + for (GenericCycleBase *cycle = heart->m_cycle; + cycle != heart->m_parent->m_cycle; cycle = cycle->getParent()) { + assert(m_heart[cycle] == heart); + m_heart.erase(cycle); + } +} + +/// \brief Generically compute the heart-adjusted post order. +void HeartAdjustedPostOrderBase::compute( + const CfgInterface &iface, + const GenericConvergenceInfoBase &convergenceInfo, + const GenericCycleInfoBase &cycleInfo, + const GenericDominatorTreeBase &domTree) { + // In our forward traversal, the modification bullets from the description of + // heart-adjusted reverse post order happen in reverse: within each + // cycle, we do a depth-first post-order traversal of only the blocks + // belonging to the cycle, starting with the heart. + // + // The depth-first search mainly uses a stack of blocks, with a look-aside + // stack of cycles. Cycles remain on the stack until their final post-order + // visit, at which time their blocks are added to the parent cycle's order. + // We also maintain a linked list of cycles that are active in the sense that + // we're currently visiting blocks inside them. + struct Cycle { + const GenericCycleBase *cycle; + CfgBlockRef heart; + unsigned parentStackIdx; + std::vector order; + SmallVector postponedBlocks; + + explicit Cycle(const GenericCycleBase *cycle, CfgBlockRef heart, + unsigned parentStackIdx) + : cycle(cycle), heart(heart), parentStackIdx(parentStackIdx) {} + }; + + DenseSet visitedBlocks; + SmallVector blockStack; + // doneIdxStack contains ((size of blockStack before pop) << 1) | isCycleHeart + SmallVector doneIdxStack; + SmallVector cycleStack; + unsigned currentCycleStackIdx = 0; + + CfgBlockRef entryBlock = domTree.getRootNode()->getBlock(); + cycleStack.emplace_back(cycleInfo.getRoot(), CfgBlockRef{}, 0); + blockStack.push_back(entryBlock); + + // The entry block is not marked as a cycle header, so that we don't attempt + // to pop the root cycle: it is handled at the very end after the loop. + doneIdxStack.push_back(blockStack.size() << 1); + iface.appendSuccessors(entryBlock, blockStack); + + do { + CfgBlockRef block = blockStack.back(); + unsigned doneBack = doneIdxStack.back(); + + if (blockStack.size() == (doneBack >> 1)) { + if (!(doneBack & 1)) { + // Post-order visit of a regular block. + cycleStack[currentCycleStackIdx].order.push_back(block); + blockStack.pop_back(); + doneIdxStack.pop_back(); + continue; + } + + // This is the post-order visit of an effective cycle heart. + Cycle &cycle = cycleStack.back(); + if (currentCycleStackIdx == cycleStack.size() - 1) + currentCycleStackIdx = cycle.parentStackIdx; + + if (!cycle.postponedBlocks.empty()) { + // Enqueue the cycle's postponed exit blocks if there are any. In this + // case, we aren't actually at the post-order visit of the cycle yet, + // if we interpret it as a contracted node contained in its parent. + for (CfgBlockRef postponed : cycle.postponedBlocks) { + assert(visitedBlocks.count(postponed)); + visitedBlocks.erase(postponed); + blockStack.push_back(postponed); + } + cycle.postponedBlocks.clear(); + continue; + } + + // True post-order visit: collect all of the cycle. + cycle.order.push_back(block); + blockStack.pop_back(); + doneIdxStack.pop_back(); + + auto &parentOrder = cycleStack[cycle.parentStackIdx].order; + parentOrder.insert(parentOrder.end(), cycle.order.begin(), + cycle.order.end()); + cycleStack.pop_back(); + continue; + } + + if (!visitedBlocks.insert(block).second) { + blockStack.pop_back(); + continue; // already visited this one + } + + // Pre-order visit of the block. + const GenericCycleBase *currentCycle = + cycleStack[currentCycleStackIdx].cycle; + CfgBlockRef currentHeart = cycleStack[currentCycleStackIdx].heart; + const GenericCycleBase *blockCycle = cycleInfo.getCycle(block); + + if (blockCycle == currentCycle || + (currentHeart && + currentHeart == convergenceInfo.getHeartBlock(blockCycle))) { + doneIdxStack.push_back(blockStack.size() << 1); + iface.appendSuccessors(block, blockStack); + continue; + } + + if (cycleInfo.contains(currentCycle, blockCycle)) { + // Entering a child cycle. In the case of irreducible control flow, + // blockCycle might not be a direct child -- find it. + while ((blockCycle->getParent() != currentCycle) && + (!currentHeart || currentHeart != convergenceInfo.getHeartBlock( + blockCycle->getParent()))) + blockCycle = blockCycle->getParent(); + + CfgBlockRef heart = convergenceInfo.getHeartBlock(blockCycle); + CfgBlockRef effectiveHeart = heart ? heart : blockCycle->getHeader(); + + cycleStack.emplace_back(blockCycle, heart, currentCycleStackIdx); + currentCycleStackIdx = cycleStack.size() - 1; + + // Fixup state as-if we're visiting the effective heart. + if (block != effectiveHeart) { + blockStack.pop_back(); + blockStack.push_back(effectiveHeart); + visitedBlocks.erase(block); + visitedBlocks.insert(effectiveHeart); + } + + doneIdxStack.push_back((blockStack.size() << 1) | 1); + iface.appendSuccessors(block, blockStack); + continue; + } + + // This block is not contained in the current cycle; we have to postpone it. + blockStack.pop_back(); + + Cycle *postponeCycle = &cycleStack[currentCycleStackIdx]; + for (;;) { + Cycle *parent = &cycleStack[postponeCycle->parentStackIdx]; + if (cycleInfo.contains(parent->cycle, blockCycle)) + break; + postponeCycle = parent; + } + postponeCycle->postponedBlocks.push_back(block); + } while (!blockStack.empty()); + + assert(cycleStack.size() == 1); + m_order = std::move(cycleStack[0].order); +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/CFLSteensAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/ConvergenceUtils.h" #include "llvm/Analysis/CycleInfo.h" #include "llvm/Analysis/DDG.h" #include "llvm/Analysis/DemandedBits.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -126,6 +126,7 @@ FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) +FUNCTION_ANALYSIS("convergenceinfo", ConvergenceInfoAnalysis()) FUNCTION_ANALYSIS("cycleinfo", CycleInfoAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis()) @@ -233,6 +234,7 @@ FUNCTION_PASS("print", BlockFrequencyPrinterPass(dbgs())) FUNCTION_PASS("print", BranchProbabilityPrinterPass(dbgs())) FUNCTION_PASS("print", DependenceAnalysisPrinterPass(dbgs())) +FUNCTION_PASS("print", ConvergenceInfoPrinterPass(dbgs())) FUNCTION_PASS("print", CycleInfoPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) diff --git a/llvm/test/Analysis/ConvergenceInfo/basic.ll b/llvm/test/Analysis/ConvergenceInfo/basic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/ConvergenceInfo/basic.ll @@ -0,0 +1,356 @@ +; RUN: opt < %s -convergenceinfo -analyze | FileCheck %s -check-prefix=CHECK +; RUN: opt < %s -disable-output -passes='print' 2>&1 | FileCheck %s -check-prefix=CHECK + +define void @empty() { +; CHECK-LABEL: ConvergenceInfo for function: empty +; CHECK-NEXT: Convergence-adjusted cycles: +; CHECK-NEXT: Convergent operations: + + ret void +} + +define void @simple() { +; CHECK-LABEL: ConvergenceInfo for function: simple +; CHECK: Convergence-adjusted cycles: +; CHECK: Convergent operations: +; CHECK: (entry) entry: %tok0 = call token @llvm.experimental.convergence.entry() +; CHECK: (user) entry: call void @convergent.op(i32 0) [ "convergencectrl"(token %tok0) ] +; CHECK: (user) then: call void @convergent.op(i32 1) [ "convergencectrl"(token %tok0) ] +; CHECK: (anchor) then: %tok1 = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) then: call void @convergent.op(i32 2) [ "convergencectrl"(token %tok1) ] + +entry: + %tok0 = call token @llvm.experimental.convergence.entry() + call void @convergent.op(i32 0) [ "convergencectrl"(token %tok0) ] + br i1 undef, label %then, label %end + +then: + call void @convergent.op(i32 1) [ "convergencectrl"(token %tok0) ] + + %tok1 = call token @llvm.experimental.convergence.anchor() + call void @convergent.op(i32 2) [ "convergencectrl"(token %tok1) ] + br label %end + +end: + ret void +} + +define void @loops() { +; CHECK-LABEL: ConvergenceInfo for function: loops +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(outer_hdr) inner_hdr inner_next +; CHECK: depth=2: entries(inner_hdr) inner_next +; CHECK: Convergent operations: +; CHECK: (anchor) entry: %anchor = call token @llvm.experimental.convergence.anchor() +; CHECK: (heart) outer_hdr (cycle=depth=1: entries(outer_hdr) inner_hdr inner_next): %outer = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ] +; CHECK: (heart) inner_hdr (cycle=depth=2: entries(inner_hdr) inner_next): %inner = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %outer) ] +; CHECK: (user) inner_hdr (cycle=depth=2: entries(inner_hdr) inner_next): call void @convergent.op(i32 0) [ "convergencectrl"(token %inner) ] + +entry: + %anchor = call token @llvm.experimental.convergence.anchor() + br label %outer_hdr + +outer_hdr: + %outer = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ] + br label %inner_hdr + +inner_hdr: + %inner = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %outer) ] + call void @convergent.op(i32 0) [ "convergencectrl"(token %inner) ] + br i1 undef, label %outer_hdr, label %inner_next + +inner_next: + br i1 undef, label %inner_hdr, label %end + +end: + ret void +} + +; +; | +; A] %a = anchor +; | +; B +; |\ +; | C +; |/ \ +; D | +; E %e = user (%a) +; +define void @extend_loops() { +; CHECK-LABEL: ConvergenceInfo for function: extend_loops +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(A) C B +; CHECK: Convergent operations: +; CHECK: (anchor) A (cycle=depth=1: entries(A) C B): %a = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) E (cycle=depth=1: entries(A) C B): call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] + +entry: + br label %A + +A: + %a = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %A, label %B + +B: + br i1 undef, label %C, label %D + +C: + br i1 undef, label %D, label %E + +D: + ret void + +E: + call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] + ret void +} + +; +; | +; A] %a = anchor +; | +; B %b = anchor +; / \ +; C \ +; / \ | +; D | | +; E | %e = user (%b) +; | +; F %f = user (%a) +; +define void @extend_loops_iterate() { +; CHECK-LABEL: ConvergenceInfo for function: extend_loops_iterate +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(A) B C +; CHECK: Convergent operations: +; CHECK: (anchor) A (cycle=depth=1: entries(A) B C): %a = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) F (cycle=depth=1: entries(A) B C): call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] +; CHECK: (anchor) B (cycle=depth=1: entries(A) B C): %b = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) E (cycle=depth=1: entries(A) B C): call void @convergent.op(i32 0) [ "convergencectrl"(token %b) ] + +entry: + br label %A + +A: + %a = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %A, label %B + +B: + %b = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %C, label %F + +C: + br i1 undef, label %D, label %E + +D: + ret void + +E: + call void @convergent.op(i32 0) [ "convergencectrl"(token %b) ] + ret void + +F: + call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] + ret void +} + +; +; | +; A<-\ %a = heart +; | | +; B] | %b = heart (%a) +; | | +; C>-/ +; | +; D %d1 = user (%b) +; %d2 = user (%a) +; +define void @nested_loop_extension() { +; CHECK-LABEL: ConvergenceInfo for function: nested_loop_extension +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(A) C B +; CHECK: depth=2: entries(B) C +; CHECK: Convergent operations: +; CHECK: (anchor) entry: %anchor = call token @llvm.experimental.convergence.anchor() +; CHECK: (heart) A (cycle=depth=1: entries(A) C B): %a = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ] +; CHECK: (heart) B (cycle=depth=2: entries(B) C): %b = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] +; CHECK: (user) D (cycle=depth=2: entries(B) C): call void @convergent.op(i32 0) [ "convergencectrl"(token %b) ] +; CHECK: (user) D (cycle=depth=1: entries(A) C B): call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] +entry: + %anchor = call token @llvm.experimental.convergence.anchor() + br label %A + +A: + %a = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ] + br label %B + +B: + %b = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] + br i1 undef, label %B, label %C + +C: + br i1 undef, label %A,label %D + +D: + call void @convergent.op(i32 0) [ "convergencectrl"(token %b) ] + call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] + ret void +} + +; +; | +; A] %a = anchor +; | +; B %b1 = anchor <-- should be associated to extended cycle! +; |\ %b2 = user (%b1) +; | X +; C %c = user (%a) +; +define void @multi_block_extension() { +; CHECK-LABEL: ConvergenceInfo for function: multi_block_extension +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(A) B +; CHECK: Convergent operations: +; CHECK: (anchor) A (cycle=depth=1: entries(A) B): %a = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) C (cycle=depth=1: entries(A) B): call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] +; CHECK: (anchor) B (cycle=depth=1: entries(A) B): %b1 = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) B (cycle=depth=1: entries(A) B): call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + +entry: + br label %A + +A: + %a = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %A, label %B + +B: + %b1 = call token @llvm.experimental.convergence.anchor() + call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + br i1 undef, label %X, label %C + +X: + ret void + +C: + call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] + ret void +} + +; +; | +; A] %a = anchor +; | +; B %b1 = anchor <-- should be associated to extended cycle! +; %b2 = user (%b1) +; %b3 = user (%a) +; +define void @multi_extension() { +; CHECK-LABEL: ConvergenceInfo for function: multi_extension +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(A) +; CHECK: Convergent operations: +; CHECK: (anchor) A (cycle=depth=1: entries(A)): %a = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) B (cycle=depth=1: entries(A)): call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] +; CHECK: (anchor) B (cycle=depth=1: entries(A)): %b1 = call token @llvm.experimental.convergence.anchor() +; CHECK: (user) B (cycle=depth=1: entries(A)): call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + +entry: + br label %A + +A: + %a = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %A, label %B + +B: + %b1 = call token @llvm.experimental.convergence.anchor() + call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + call void @convergent.op(i32 0) [ "convergencectrl"(token %a) ] + ret void +} + +; +; | +; A] %a = anchor +; | +; B] %b1 = loop heart (%a) +; | %b2 = user (%b1) +; | +; C +; +define void @lift_loop() { +; CHECK-LABEL: ConvergenceInfo for function: lift_loop +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(A) B +; CHECK: depth=2: entries(B) +; CHECK: Convergent operations: +; CHECK: (anchor) A (cycle=depth=1: entries(A) B): %a = call token @llvm.experimental.convergence.anchor() +; CHECK: (heart) B (cycle=depth=2: entries(B)): %b1 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] +; CHECK: (user) B (cycle=depth=2: entries(B)): call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + +entry: + br label %A + +A: + %a = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %A, label %B + +B: + %b1 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] + call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + br i1 undef, label %B, label %C + +C: + ret void +} + +define void @false_heart_trivial() { +; CHECK-LABEL: ConvergenceInfo for function: false_heart_trivial +; CHECK: Convergence-adjusted cycles: +; CHECK: Convergent operations: +; CHECK: (entry) entry: %a = call token @llvm.experimental.convergence.entry() +; CHECK: (copy) entry: %b = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] +; CHECK: (user) entry: call void @convergent.op(i32 0) [ "convergencectrl"(token %b) ] +entry: + %a = call token @llvm.experimental.convergence.entry() + %b = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] + call void @convergent.op(i32 0) [ "convergencectrl"(token %b) ] + ret void +} + +; +; | +; A] %a = loop heart +; | +; B %b1 = false heart (%a) +; %b2 = user (%b1) +; +define void @false_heart_lifted() { +; CHECK-LABEL: ConvergenceInfo for function: false_heart_lifted +; CHECK: Convergence-adjusted cycles: +; CHECK: depth=1: entries(A) +; CHECK: Convergent operations: +; CHECK: (anchor) entry: %anchor = call token @llvm.experimental.convergence.anchor() +; CHECK: (heart) A (cycle=depth=1: entries(A)): %a = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ] +; CHECK: (copy) B (cycle=depth=1: entries(A)): %b1 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] +; CHECK: (user) B (cycle=depth=1: entries(A)): call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + +entry: + %anchor = call token @llvm.experimental.convergence.anchor() + br label %A + +A: + %a = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %anchor) ] + br i1 undef, label %A, label %B + +B: + %b1 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %a) ] + call void @convergent.op(i32 0) [ "convergencectrl"(token %b1) ] + ret void +} + +declare void @convergent.op(i32) convergent + +declare token @llvm.experimental.convergence.entry() +declare token @llvm.experimental.convergence.anchor() +declare token @llvm.experimental.convergence.loop()