diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -205,6 +205,7 @@ void initializeInternalizeLegacyPassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); void initializeJumpThreadingPass(PassRegistry&); +void initializeAJTLegacyPassPass(PassRegistry&); void initializeLCSSAVerificationPassPass(PassRegistry&); void initializeLCSSAWrapperPassPass(PassRegistry&); void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h --- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -157,6 +157,7 @@ bool DisableTailCalls; bool DisableUnrollLoops; bool CallGraphProfile; + bool EnableAJT; bool SLPVectorize; bool LoopVectorize; bool LoopsInterleaved; diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -240,6 +240,14 @@ FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, int Threshold = -1); +//===----------------------------------------------------------------------===// +// +// AggressiveJumpThreading - This pass performs more aggressive 'jump threading' +// than the JumpThreading pass. The pass can jump over loop heads. +// As a result it can create unnatural loops. +// +FunctionPass *createAJTPass(); + //===----------------------------------------------------------------------===// // // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, diff --git a/llvm/include/llvm/Transforms/Scalar/AJT.h b/llvm/include/llvm/Transforms/Scalar/AJT.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/AJT.h @@ -0,0 +1,30 @@ +//===- AJT.h - Aggressive jump threading ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the Aggressive jump threading pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_AJT_H +#define LLVM_TRANSFORMS_SCALAR_AJT_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +// SDCOMP-28505 +struct AJTPass : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_AJT_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -126,6 +126,7 @@ #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/Transforms/Scalar/AJT.h" #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" #include "llvm/Transforms/Scalar/CallSiteSplitting.h" @@ -243,6 +244,10 @@ "enable-npm-gvn-sink", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); +static cl::opt EnableAJT( + "enable-npm-ajt", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Enable AJT for the new PM (default = on)")); + static cl::opt EnableUnrollAndJam( "enable-npm-unroll-and-jam", cl::init(false), cl::Hidden, cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)")); @@ -1187,6 +1192,12 @@ // Enhance/cleanup vector code. OptimizePM.addPass(VectorCombinePass()); + + if (Level == OptimizationLevel::O3 && EnableAJT) { + OptimizePM.addPass(AJTPass()); + OptimizePM.addPass(CorrelatedValuePropagationPass()); + } + OptimizePM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any parallel diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -180,6 +180,7 @@ FUNCTION_PASS("adce", ADCEPass()) FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass()) FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass()) +FUNCTION_PASS("ajt", AJTPass()) FUNCTION_PASS("assume-builder", AssumeBuilderPass()) FUNCTION_PASS("assume-simplify", AssumeSimplifyPass()) FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -170,6 +170,10 @@ clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); +static cl::opt EnableAggressiveJumpThreading( + "aggressive-jump-threading", cl::init(true), cl::Hidden, + cl::desc("Enable the Aggressive Jump Threading Pass")); + extern cl::opt EnableKnowledgeRetention; PassManagerBuilder::PassManagerBuilder() { @@ -201,6 +205,7 @@ PerformThinLTO = EnablePerformThinLTO; DivergentTarget = false; CallGraphProfile = true; + EnableAJT = EnableAggressiveJumpThreading; } PassManagerBuilder::~PassManagerBuilder() { @@ -812,6 +817,10 @@ MPM.add(createVectorCombinePass()); addExtensionsToPM(EP_Peephole, MPM); + if (OptLevel > 2 && EnableAJT) { + MPM.add(createAJTPass()); + MPM.add(createCorrelatedValuePropagationPass()); + } MPM.add(createInstructionCombiningPass()); if (EnableUnrollAndJam && !DisableUnrollLoops) { diff --git a/llvm/lib/Transforms/Scalar/AJT.cpp b/llvm/lib/Transforms/Scalar/AJT.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/AJT.cpp @@ -0,0 +1,1518 @@ +//===- AJT.cpp - Thread control through conditional blocks ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Aggressive Jump Threading pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/AJT.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" + +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "ajt" + +#ifndef NDEBUG +static const char AJTDbgMsgPrefix[] = "AJT: "; +static const bool ReportTODO = false; +#endif + +namespace { + +/// This class represents a point of the path consisting of phi-functions which +/// a constant goes through. +/// Const1 -> phi1 -> phi2 -> ... -> phiN -> nullptr +/// Example: +/// 1 3 4 5 +/// | | | | +/// v v v v +/// ----- ----- +/// | | +/// v v +/// phi1 phi2 2 +/// | | | +/// v v v +/// ---------------- +/// | +/// v +/// phi3 +/// There are five paths for constants: +/// 1 -> phi1 -> phi3 +/// 2 -> phi3 +/// 3 -> phi1 -> phi3 +/// 4 -> phi2 -> phi3 +/// 5 -> phi2 -> phi3 +/// There are three phi-functions which are points in data flow paths: phi1, phi2, phi3 +/// We just put the tree above into std::list: point and its parent. +/// There is information which points are start points (leaves in tree). +/// It is stored in IncomingConstantsContainer. +class PhiDataFlowPoint { + PHINode *Phi; + PhiDataFlowPoint *NextPoint; + +public: + PhiDataFlowPoint(PHINode *PHI, PhiDataFlowPoint *NextPoint) + : Phi(PHI), NextPoint(NextPoint) {} + + PHINode *getPhi() { return Phi; } + + PhiDataFlowPoint *getNextPoint() { return NextPoint; } +}; + +/// This pass performs more aggressive jump threading than the JumpThreading +/// pass. The pass can jump over loop heads. As a result it can create +/// unnatural loops. +/// +/// Supported terminators: SwitchInst and BranchInst with integer equality +/// comparison. +/// +/// Main idea: +/// - if we know all constants incoming into a basic block and successors for +/// them we can jump over this basic block to the corresponding successor. +/// - as a source block of constant might not be an immediate predecessor of +/// of the basic block all basic block the constant passes through must be +/// copied to preserve CFG and DFG correctness. +/// +/// Start function: run +class AJT { +public: + typedef SmallVector< + std::tuple, 16> + IncomingConstantsContainer; + + AJT(TargetLibraryInfo *TLI, TargetTransformInfo *TTI) : TLI(TLI), TTI(TTI) {} + + bool run(Function &F); + +private: + typedef SmallPtrSet SkipBlocksSet; + typedef SetVector OriginalBlocksSet; + + /// This is an abstract class which defines context needed to optimise + /// terminator instructions: + /// - what constant are collected + /// - successors chosen for constants + /// - when it's profitable to transform + class TerminatorContext { + public: + IncomingConstantsContainer &getIncomingConstants() { + return IncomingConstants; + } + virtual BasicBlock *getTerminatorSuccForConstant(ConstantInt *C) = 0; + void collectIncomingConstants(); + Instruction *getTerminator() { return Terminator; } + PHINode *getStartingPoint() { return StartingPoint; } + + protected: + TerminatorContext(PHINode *Phi, Instruction *T) : StartingPoint(Phi), Terminator(T) { + assert(StartingPoint); + assert(Terminator); + } + virtual ~TerminatorContext() {} + TerminatorContext(const TerminatorContext &) = delete; + TerminatorContext& operator=(const TerminatorContext &) = delete; + private: + PHINode *StartingPoint; + Instruction *Terminator; + IncomingConstantsContainer IncomingConstants; + std::list DataFlowPoints; + }; + + /// Define context needed to optimise switch instructions: + /// - successors chosen for constants + /// - when it's profitable to transform + class SwitchInstContext: public TerminatorContext { + public: + explicit SwitchInstContext(PHINode *StartingPoint, SwitchInst *I) : TerminatorContext(StartingPoint, I) { + } + SwitchInstContext(const SwitchInstContext&) = delete; + SwitchInstContext& operator=(const SwitchInstContext&) = delete; + + BasicBlock *getTerminatorSuccForConstant(ConstantInt *C) { + assert(C); + return cast(getTerminator())->findCaseValue(C)->getCaseSuccessor(); + } + }; + + OriginalBlocksSet OriginalBlocks; + SkipBlocksSet SkipConstantSources; + std::list UnfoldedOriginalBlocks; + std::list DeletedBlocksBin; + const TargetLibraryInfo *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; + int BasicBlockCopiesCounter = 0; + + bool isSupported(const BasicBlock &B); + void init(Function &F); + void createOriginalBlocksSet(Function &F); + bool isBlockAllowedToCopy(BasicBlock &BB); + bool applyAJT(BasicBlock &BB); + void deleteDeadBlock(BasicBlock *BB); + void mergeBlockWithSCPred(BasicBlock &BB); + void changeBlockSuccToSuccOfSucc(BasicBlock *BB, BasicBlock *OldSucc, + BasicBlock *NewSucc); + bool redirectCFPathFromBlockToItsSuccForConst(BasicBlock *Block, + BasicBlock *BlockSucc, + ConstantInt *C, + BasicBlock *ConstSourceBlock, + PhiDataFlowPoint *DataFlowStartPoint); + void changeBlockSuccToNewSuccCopy(BasicBlock *BB, + BasicBlock *Succ, + BasicBlock *SuccCopy, + ValueToValueMapTy &V2VMap); + bool createAlternativeCFPath(TerminatorContext &TerminatorCtxt); + void simplifyCFG(Function &F); + void + findSelectInstructionsToUnfoldRec(PHINode *Phi, + SmallPtrSetImpl &SeenPhis, + SetVector &SelectInsts); + bool tryToUnfoldSelectInstructions(PHINode &StartPhi); + PHINode *tryToUnfoldSelectInstruction(SelectInst &SI); + void correctInstructionsAfterCopying(BasicBlock *OriginalBlock, + BasicBlock *BlockCopy, + BasicBlock *PredToRedirect, + ValueToValueMapTy &V2VMap); + void eraseDeletedBlocks() { + while (!DeletedBlocksBin.empty()) { + BasicBlock *B = DeletedBlocksBin.front(); + removeFromOriginalBlocks(B); + SkipConstantSources.erase(B); + DeletedBlocksBin.pop_front(); + } + } + void removeFromOriginalBlocks(const BasicBlock *BB); + bool isAmountOfInstructionsToCopyBig(SmallVectorImpl &BlocksToCopy); + bool isBlockDeleted(BasicBlock *BB) { + return (std::find(DeletedBlocksBin.begin(), DeletedBlocksBin.end(), BB) + != DeletedBlocksBin.end()); + } +}; + +class AJTLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification + + AJTLegacyPass() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + auto *TLI = &getAnalysis().getTLI(F); + auto *TTI = &getAnalysis().getTTI(F); + + return AJT(TLI, TTI).run(F); + } + + StringRef getPassName() const override { return "Aggressive Jump Threading"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequiredID(LCSSAID); + AU.addPreserved(); + } +}; +} + +char AJTLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(AJTLegacyPass, "ajt", "Aggressive Jump Threading", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) +INITIALIZE_PASS_END(AJTLegacyPass, "ajt", "Aggressive Jump Threading", false, + false) + +FunctionPass *llvm::createAJTPass() { return new AJTLegacyPass(); } + +static bool isBlockUnreachable(BasicBlock *B) { + assert(B); + return (pred_empty(B) || B->getUniquePredecessor() == B); +} + +static const unsigned CopyableBlockMaxSuccessors = 3; + +/// Returns true if the specified block is allowed to copy. +bool AJT::isBlockAllowedToCopy(BasicBlock &BB) { + assert(TTI); + /*for (auto &Inst: BB.getInstList()) { + // copying calls might increase pressure on hardware branch predictors. + if (const auto *Call = dyn_cast(&Inst)) { + if (TargetTransformInfo::TCC_Free != + TTI->getUserCost(Call, TargetTransformInfo::TCK_SizeAndLatency)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << "Not TCC_Free call: " << *Call << "\n"); + return false; + } + } + }*/ + + //copying such might increase pressure on hardware branch predictors. + //return (BB.getTerminator()->getNumSuccessors() <= CopyableBlockMaxSuccessors); + return true; +} + +/// Blocks without incoming edges can be deleted as they are unreachable. +void AJT::deleteDeadBlock(BasicBlock *BB) { + assert(BB); + assert(isBlockUnreachable(BB) && "The block must not have predecessor."); + + DeleteDeadBlock(BB); + DeletedBlocksBin.push_back(BB); +} + +void AJT::removeFromOriginalBlocks(const BasicBlock *BB) { + if (std::find(OriginalBlocks.begin(), OriginalBlocks.end(), BB) != + OriginalBlocks.end()) + OriginalBlocks.erase( + std::find(OriginalBlocks.begin(), OriginalBlocks.end(), BB)); +} + +// Check if the specified basic block is singly connected to its predecessor. +static bool isPredecessorSinglyConnectedTo(const BasicBlock &BB) { + if (const BasicBlock *SinglePred = BB.getSinglePredecessor()) { + if (SinglePred->getTerminator()->getNumSuccessors() == 1 && + !BB.hasAddressTaken()) { + assert(SinglePred != &BB && "Unreachable single node loop."); + return true; + } + } + return false; +} + +/// Merge the specified basic block with its singly connected predecessor. +void AJT::mergeBlockWithSCPred(BasicBlock &BB) { + assert(isPredecessorSinglyConnectedTo(BB)); + removeFromOriginalBlocks(BB.getSinglePredecessor()); + SkipConstantSources.erase(BB.getSinglePredecessor()); + MergeBasicBlockIntoOnlyPred(&BB); +} + +/// Collect constants and their paths through phi-functions +/// +/// \param CurPhi the current analysed phi-function +/// \param SeenPhis seen phi-functions +/// \param IncomingConstants the container where found incoming constants +/// and their start point to phi-functions are stored +/// \param DataFlowPoints storage for path consisting of linked phi-functions +/// \param NextPoint as constant is looked in up directions NextPoint is +/// a phi-function next to the current phi-function in down direction from the +/// source block of constant to the current block. +/// +/// \returns true if constants have been found +static bool collectConstants(PHINode *CurPhi, + SmallPtrSetImpl &SeenPhis, + AJT::IncomingConstantsContainer &IncomingConstants, + std::list &DataFlowPoints, + PhiDataFlowPoint *NextPoint) { + assert(CurPhi); + if (!SeenPhis.insert(CurPhi).second) + return false; + + // phi-functions might have several same incoming blocks + SmallPtrSet SeenBlocks; + SmallSetVector UniquePhis; // UniquePhis must have a deterministic ordered set + SmallVector InConstantsIds; + for (unsigned I = 0, E = CurPhi->getNumIncomingValues(); I != E; ++I) { + if (!SeenBlocks.insert(CurPhi->getIncomingBlock(I)).second) { + continue; + } + + Value *InValue = CurPhi->getIncomingValue(I); + if (isa(InValue)) { + InConstantsIds.push_back(I); + } else if (PHINode *Phi = dyn_cast(InValue)) { + if (!UniquePhis.insert(Phi) && !SeenPhis.count(Phi)) { + assert(!ReportTODO && "Implement collecting constants for CF like diamonds etc."); + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Unsupported CF: phi-function " + << CurPhi->getName() << " has phi-function " << Phi->getName() << " incoming from different basic blocks.\n"); + + return false; + } + } + } + + DataFlowPoints.push_back(PhiDataFlowPoint{CurPhi, NextPoint});// create a new point + PhiDataFlowPoint *CurrPoint = &DataFlowPoints.back(); + + bool FoundConstants = !InConstantsIds.empty(); + for (unsigned Id: InConstantsIds) { + IncomingConstants.push_back(std::make_tuple(cast(CurPhi->getIncomingValue(Id)), + CurPhi->getIncomingBlock(Id), + CurrPoint)); + } + + // FIXME: The ability of performing optimizations depends on the order we iterate + for (PHINode *Phi: UniquePhis) { + FoundConstants |= collectConstants(Phi, SeenPhis, IncomingConstants, + DataFlowPoints, CurrPoint); + } + + if (!FoundConstants) + DataFlowPoints.pop_back(); + + return FoundConstants; +} + +/// Change terminator target. +static int changeteTerminatorTarget(Instruction *T, BasicBlock *OldSucc, + BasicBlock *NewSucc) { + assert(T); + assert(OldSucc); + assert(NewSucc); + int NumEdgesToNewSucc = 0; + for (unsigned I = 0, E = T->getNumSuccessors(); I < E; ++I) { + if (T->getSuccessor(I) == OldSucc) { + T->setSuccessor(I, NewSucc); + ++NumEdgesToNewSucc; + } + } + return NumEdgesToNewSucc; +} + +/// \brief Change the successor of the block from the current successor to the +/// successor of the current successor. Update phi-funcions. +/// +/// \param[in,out] BB the basic block which successor is changed +/// \param[in,out] CurBBSucc the current BB successor +/// \param[in,out] CurBBSuccSucc the successor of the successor of BB +void AJT::changeBlockSuccToSuccOfSucc(BasicBlock *BB, BasicBlock *CurBBSucc, + BasicBlock *CurBBSuccSucc) { + assert(BB); + assert(CurBBSucc); + assert(CurBBSuccSucc); + assert(std::find(succ_begin(CurBBSucc), succ_end(CurBBSucc), CurBBSuccSucc) != + succ_end(CurBBSucc)); + assert(std::find(succ_begin(BB), succ_end(BB), CurBBSucc) != succ_end(BB)); + + int NumEdgesToCurBBSuccFromBB = changeteTerminatorTarget(BB->getTerminator(), CurBBSucc, CurBBSuccSucc); + assert(NumEdgesToCurBBSuccFromBB >= 1); + + // update phi-functions of CurBBSuccSucc to have an incoming value from BB. + for (auto I = CurBBSuccSucc->begin(); isa(I);) { + PHINode *Phi = cast(I++); + if (Phi->use_empty()) { + Phi->eraseFromParent(); + continue; + } + + // Find a value coming from CurBBSucc. This value needs to come from BB + // through all edges from BB to CurBBSuccSucc. + for (unsigned Id = 0, IdE = Phi->getNumIncomingValues(); Id < IdE; ++Id) { + if (Phi->getIncomingBlock(Id) != CurBBSucc) + continue; + + Value *V = Phi->getIncomingValue(Id); + if (PHINode *InPhi = dyn_cast(V)) { + if (InPhi->getParent() == CurBBSucc) { + V = InPhi->getIncomingValueForBlock(BB); + assert(V); + } + } + // If there are different values coming from BB they need to be changed to + // V. + for (unsigned Id1 = 0; Id1 < IdE; ++Id1) { + if (Phi->getIncomingBlock(Id1) == BB && + Phi->getIncomingValue(Id1) != V) { + Phi->setIncomingValue(Id1, V); + } + } + for (int i = 0; i < NumEdgesToCurBBSuccFromBB; ++i) { + Phi->addIncoming(V, BB); + } + break; + } + } + + if (isBlockUnreachable(CurBBSucc)) { + for (auto I = CurBBSucc->begin(); isa(I);) { + PHINode *Phi = cast(I++); + + assert(Phi->getBasicBlockIndex(BB) != -1); + + if (!Phi->use_empty()) { + Value *V = Phi->getIncomingValue(0); + Phi->replaceAllUsesWith(V); + } + Phi->eraseFromParent(); + } + } else { + // correct SSA form as phi-functions in CurBBSucc are changed + SSAUpdater SSAUp; + SmallVector UsesToRewrite; + for (auto I = CurBBSucc->begin(); isa(I);) { + PHINode *Phi = cast(I++); + + if (Phi->use_empty()) { + Phi->eraseFromParent(); + continue; + } + + assert(Phi->getBasicBlockIndex(BB) != -1); + + // As BB is disconnected from CurBBSucc all incoming values from BB + // must be removed from Phi. + const bool DontDeletePHIIfEmpty = false; + Value *V = Phi->removeIncomingValue(BB, DontDeletePHIIfEmpty); + for (int i = 1; i < NumEdgesToCurBBSuccFromBB; ++i) { + Phi->removeIncomingValue(BB, DontDeletePHIIfEmpty); + } + if (!Phi->getNumOperands()) { + Phi->replaceAllUsesWith(UndefValue::get(Phi->getType())); + Phi->eraseFromParent(); + continue; + } + + assert(Phi->getBasicBlockIndex(BB) == -1); + + // Find external uses of Phi + for (Use &U : Phi->uses()) { + if (Instruction *User = dyn_cast(U.getUser())) { + if (PHINode *UserPN = dyn_cast(User)) { + if (UserPN->getIncomingBlock(U) == CurBBSucc) + continue; + } else if (User->getParent() == CurBBSucc) { + continue; + } + + UsesToRewrite.push_back(&U); + } + } + + if (UsesToRewrite.empty()) + continue; + + SSAUp.Initialize(V->getType(), V->getName()); + SSAUp.AddAvailableValue(BB, V); // V comes form BB + SSAUp.AddAvailableValue(CurBBSucc, Phi); // Phi comes form CurBBSucc + while (!UsesToRewrite.empty()) + SSAUp.RewriteUse(*UsesToRewrite.pop_back_val()); + } + } +} + +/// \brief Correct instructions in OriginalBlock, its copy and successors of OriginalBlock. +/// +/// \param[in,out] OriginalBlock the original block +/// \param[in,out] BlockCopy the copy of OriginalBlock +/// \param[in] PredToRedirect the predecessor of OriginalBlock which is redirected to the copy +/// \param[in] V2VMap mapping between instructions from OriginalBlock and their copies from BlockCopy +void AJT::correctInstructionsAfterCopying(BasicBlock *OriginalBlock, + BasicBlock *BlockCopy, + BasicBlock *PredToRedirect, + ValueToValueMapTy &V2VMap) { + assert(OriginalBlock); + assert(BlockCopy); + assert(PredToRedirect); + + // update phi-functions in OriginalBlock and BlockCopy: + // - for phi-functions from OriginalBlock incoming values from PredToRedirect are removed + // - for phi-functions from BlockCopy incoming values from PredToRedirect are added + for (auto I = OriginalBlock->begin(); isa(I); ++I) { + PHINode *Phi = cast(I); + assert(V2VMap[Phi]); + PHINode *PhiCopy = cast(V2VMap[Phi]); + // As a copy block does not have incoming blocks yet all its phi-functions should empty. + PHINode *EmptyPhi = PHINode::Create(PhiCopy->getType(), 0, PhiCopy->getName(), PhiCopy); + PhiCopy->replaceAllUsesWith(EmptyPhi); + V2VMap[&*I] = EmptyPhi; + PhiCopy->eraseFromParent(); + PhiCopy = EmptyPhi; + + for (unsigned Id = 0; Id < Phi->getNumIncomingValues(); ) { + if (Phi->getIncomingBlock(Id) != PredToRedirect) { + ++Id; + continue; + } + + Value *V = Phi->getIncomingValue(Id); + PhiCopy->addIncoming(V, PredToRedirect); + Phi->removeIncomingValue(Id, false); + } + assert(Phi->getBasicBlockIndex(PredToRedirect) == -1); + } + + // correct instructions in OriginalBlock and BlockCopy + for (auto I = OriginalBlock->begin(), E = OriginalBlock->end(); I != E; ++I) { + if (I->use_empty()) + continue; + + assert(isa(V2VMap[&*I])); + Instruction *ICopy = cast(V2VMap[&*I]); + + SmallPtrSet UsersToUpdate; + for (auto UseIt = I->use_begin(), UseE = I->use_end(); UseIt != UseE; ++UseIt) { + if (Instruction *IUser = dyn_cast(UseIt->getUser())) { + if (IUser->getParent() == OriginalBlock) + continue; // don't need to updates users inside the original block + + if (IUser->getParent() == BlockCopy) { + // uses inside the block copy must updated to uses of the instruction copy + UsersToUpdate.insert(IUser); + continue; + } + } + } + for (auto User: UsersToUpdate) + User->replaceUsesOfWith(&*I, ICopy); + } + + // Update phi-functions of OriginalBlock successors to take into account incoming values from BlockCopy + SmallPtrSet SeenSuccessors; + for (auto SuccIt = succ_begin(OriginalBlock), SuccItE = succ_end(OriginalBlock); + SuccIt != SuccItE; ++SuccIt) { + if (!SeenSuccessors.insert(*SuccIt).second) + continue; + for (auto I = SuccIt->begin(); isa(I); ++I) { + PHINode *Phi = cast(I); + Value *V = nullptr; + int VCount = 0; + for (unsigned Id = 0, IdE = Phi->getNumIncomingValues(); Id != IdE; ++Id) { + if (Phi->getIncomingBlock(Id) != OriginalBlock) + continue; + + V = Phi->getIncomingValue(Id); + ++VCount; + } + + assert(V); + if (Instruction *Inst = dyn_cast(V)) { + Value *InstCopy = V2VMap[Inst]; + if (InstCopy) + V = InstCopy; + } + + for (; VCount > 0; --VCount) + Phi->addIncoming(V, BlockCopy); + } + } +} + +/// \brief Change the successor of the specified block to successor copy and correct +/// phi-functions. +/// +/// \param[in,out] BB the basic block which successor is changed +/// \param[in,out] Succ the current successor of BB +/// \param[in,out] SuccCopy the copy of the current successor Succ which becomes a successor instead of Succ +/// \param[in] V2VMap mapping between instructions from Succ and their copies from SuccCopy +void AJT::changeBlockSuccToNewSuccCopy(BasicBlock *BB, + BasicBlock *Succ, + BasicBlock *SuccCopy, + ValueToValueMapTy &V2VMap) { + assert(BB); + assert(Succ); + assert(SuccCopy); + assert(std::find(succ_begin(BB), succ_end(BB), SuccCopy) == succ_end(BB)); + assert(std::find(succ_begin(BB), succ_end(BB), Succ) != succ_end(BB)); + assert(std::find(succ_begin(Succ), succ_end(Succ), SuccCopy) == + succ_end(Succ)); + + correctInstructionsAfterCopying(Succ, SuccCopy, BB, V2VMap); + changeteTerminatorTarget(BB->getTerminator(), Succ, SuccCopy); + + // correct SSA form + SSAUpdater SSAUp; + SmallVector UsesToRewrite; + for (auto I = Succ->begin(), E = Succ->end(); I != E; ++I) { + for (Use &U : I->uses()) { + Instruction *User = cast(U.getUser()); + assert(User->getParent() != SuccCopy); + if (PHINode *UserPN = dyn_cast(User)) { + if (UserPN->getIncomingBlock(U) == Succ) + continue; + } else if (User->getParent() == Succ) { + continue; + } + + UsesToRewrite.push_back(&U); + } + + if (UsesToRewrite.empty()) + continue; + + assert(V2VMap[&*I]); + assert(isa(V2VMap[&*I])); + Instruction *ICopy = cast(V2VMap[&*I]); + SSAUp.Initialize(I->getType(), I->getName()); + SSAUp.AddAvailableValue(Succ, &*I); // I comes form Succ + SSAUp.AddAvailableValue(SuccCopy, ICopy); // ICopy comes form SuccCopy + while (!UsesToRewrite.empty()) + SSAUp.RewriteUse(*UsesToRewrite.pop_back_val()); + } +} + +// Recursively check that there is a data flow path from phi-function Phi1 to phi-function Phi2 +static bool isPhiConnectedToPhiRec(PHINode *Phi1, PHINode *Phi2, SmallPtrSetImpl &SeenPhis) { + assert(Phi1); + assert(Phi2); + if (Phi2 == Phi1) + return true; //reached + + if (!SeenPhis.insert(Phi2).second) + return false; // Cannot reach Phi1 via this way + + for (Use &U: Phi2->operands()) { + if (PHINode *Phi = dyn_cast(U.get())) { + if (isPhiConnectedToPhiRec(Phi1, Phi, SeenPhis)) + return true; + } + } + + return false; +} + +// Check if phi-function Phi1 is connected to phi-function Phi2 +// (there is a data flow path from Phi1 to Phi2) via phi-function Phi3 +static bool isPhiConnectedToPhiViaPhi(PHINode *Phi1, PHINode *Phi2, PHINode *Phi3) { + SmallPtrSet SeenPhis; + SeenPhis.insert(Phi2); // marked Phi2 as seen not to have loop + return isPhiConnectedToPhiRec(Phi1, Phi3, SeenPhis); +} + +// For phi-function PhiValue find a basic block from which the value defined by PhiValue comes to PhiUser. +// Note: Phi1 can be in a block which is not immediate predecessor of the parent block of Phi2. +// Note: There must be only incoming block into Phi1 for Phi2. +static BasicBlock *findBlockIncomingIntoPhiWithValueOfPhi(PHINode* PhiUser, PHINode* PhiValue) { + // there should be only path from PhiValue to PredUser + assert(std::count_if(PhiUser->op_begin(), PhiUser->op_end(), + [PhiUser, PhiValue](Use& U){ + return isa(U.get()) + && isPhiConnectedToPhiViaPhi(PhiValue, PhiUser, cast(U.get())); + }) == 1); + + for (Use &U: PhiUser->operands()) { + if (PHINode *Phi = dyn_cast(U.get())) { + // There might be new created phi-functions between PhiUser and PredValue + if (isPhiConnectedToPhiViaPhi(PhiValue, PhiUser, Phi)) + return PhiUser->getIncomingBlock(U); + } + } + return nullptr; +} + +/// \brief Collect all basic blocks between basic blocks of connected phi-functions. +template +bool collectBlocksBetweenConnectedPhis(OutputIt OI, PHINode *PredPhi, + PHINode *Phi) { + assert(PredPhi); + assert(Phi); + assert(PredPhi != Phi); + assert(PredPhi->getParent() != Phi->getParent()); + + // Go down and add all unique successors. + // We can reach a block which does not have an unique successor and is not + // a parent block of Phi. The previous seen block is stored into PrevB. + BasicBlock *StoppedBlock = nullptr; + for (BasicBlock *B = PredPhi->getParent()->getUniqueSuccessor(), *E = Phi->getParent(); + B; B = B->getUniqueSuccessor()) { + if (B == E) + return true; // Reached a parent block of Phi. All blocks are added. + OI = B; + ++OI; + StoppedBlock = B; + } + + if (!StoppedBlock) { + assert(!ReportTODO && "implement copying CF: diamond/triangle control flow"); + return false; + } + + SmallVector BBlocks; + // Go up and add all unique predecessors + for (BasicBlock *B = findBlockIncomingIntoPhiWithValueOfPhi(Phi, PredPhi); + B != StoppedBlock; B = B->getUniquePredecessor()) { + if (!B) { + assert(!ReportTODO && "implement copying CF: diamond/triangle control flow"); + return false; + } + BBlocks.push_back(B); + } + std::copy(BBlocks.rbegin(), BBlocks.rend(), OI); + return true; +} + +// There are blocks between connected phi-functions when a basic block of one phi-function +// is not an incoming basic block of another phi-function. +static bool areThereBlocksBetweenConnectedPhis(PHINode *PredPhi, PHINode *Phi) { + if (!PredPhi) + return false; + + assert(Phi); + + return Phi->getBasicBlockIndex(PredPhi->getParent()) == -1; +} + +/// \brief Create a list of blocks which need to be copied to preserve correctness of CF +/// after redirecting CF. +/// +/// \param[out] OI output iterator which is an interface to storage where to put blocks +/// \param[in] DataFlowStartPoint This is a list of phi-function which the constant goes through. +/// \param[in] FinalBlock a final block which the constant comes in. +template +void createListOfBlocksToCopy(OutputIt OI, PhiDataFlowPoint *DataFlowStartPoint, + BasicBlock *FinalBlock) { + assert(DataFlowStartPoint); + assert(DataFlowStartPoint->getPhi()); + assert(FinalBlock); + + PHINode *Phi = nullptr; // current phi-function + PHINode *PrevPhi = nullptr; // previous phi-function + PhiDataFlowPoint *DataFlowPoint = DataFlowStartPoint; + while (DataFlowPoint && DataFlowPoint->getPhi()->getParent() != FinalBlock) { + PrevPhi = Phi; + Phi = DataFlowPoint->getPhi(); + // there might be blocks between the basic block of PrevPhi and the basic + // block of the current Phi. They also must be out on the list. + if (areThereBlocksBetweenConnectedPhis(PrevPhi, Phi)) { + if (!collectBlocksBetweenConnectedPhis(OI, PrevPhi, Phi)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Unsupported CF between phi-functions: " + << PrevPhi->getName() << " and " << Phi->getName() << '\n'); + return; + } + } + + OI = Phi->getParent(); + ++OI; + DataFlowPoint = DataFlowPoint->getNextPoint(); + } + + if (DataFlowPoint) { // we reached FinalBlock + PrevPhi = Phi; + Phi = DataFlowPoint->getPhi(); + // add all blocks between basic blocks of phi-functions + if (areThereBlocksBetweenConnectedPhis(PrevPhi, Phi)) { + if (!collectBlocksBetweenConnectedPhis(OI, PrevPhi, Phi)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Unsupported CF between phi-functions: " + << PrevPhi->getName() << " and " << Phi->getName() << '\n'); + return; + } + } + return; + } + + assert(Phi); + + // we need to add all block between FinalBlock and the basic block of the last + // phi-function. As they don't have phi-functions they are unique predecessors + // of each another. + SmallVector BBlocks; + for (BasicBlock *Pred = FinalBlock->getUniquePredecessor(); + Pred && Pred != Phi->getParent(); Pred = Pred->getUniquePredecessor()) { + BBlocks.push_back(Pred); + } + if (!BBlocks.empty() && !BBlocks.back()->getUniquePredecessor()) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Unsupported CF: " + << BBlocks.back() << " does not have an unique predecessor.\n"); + return; + } + std::copy(BBlocks.rbegin(), BBlocks.rend(), OI); +} + +/// Count executable instructions in the specified basic block. +static int countExecutableInstructionsInBlock(BasicBlock *B) { + assert(B); + int Count = 0; + for (auto I = B->begin(), E = B->end(); I != E; ++I) { + if (isa(I)) + continue; + // Debugger intrinsics don't incur code size. + if (isa(I)) + continue; + + // If this is a pointer->pointer bitcast, it is free. + if (isa(I) && I->getType()->isPointerTy()) + continue; + + ++Count; + } + return Count; +} + +// Define max allowed percent of instructions (comparing to instructions +// from original blocks) to be copied. +// TODO: Temporarily set 80% as it needs to be tuned. +static const int MaxPercentInstructionsToBeCopied = 80; + +/// Assess if amount of instruction to be copied in not big. +bool AJT::isAmountOfInstructionsToCopyBig(SmallVectorImpl& BlocksToCopy) { + assert(!OriginalBlocks.empty()); // We always make copies from original blocks. Why is the set empty? + assert(!BlocksToCopy.empty()); // No blocks to copy. Strange. + assert(!ReportTODO && "Tune parameter: MaxPercentInstructionsToBeCopied"); + int OriginalBlocksExeInstrCount = 0; + for (auto BB: OriginalBlocks) { + // Check BB is not in DeletedBlocksBin + if (isBlockDeleted(BB)) + continue; + OriginalBlocksExeInstrCount += countExecutableInstructionsInBlock(BB); + } + + int CopiedBlocksExeInstrCount = 0; + for (auto BB: BlocksToCopy) { + CopiedBlocksExeInstrCount += countExecutableInstructionsInBlock(BB); + } + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << "" + << " Original blocks instructions count " << OriginalBlocksExeInstrCount + << ". Copied blocks instructions count " << CopiedBlocksExeInstrCount + << ". %: " << ((CopiedBlocksExeInstrCount*100) / OriginalBlocksExeInstrCount) + << ". Max % allowed: " << MaxPercentInstructionsToBeCopied + << '\n'); + + return (CopiedBlocksExeInstrCount*100) / OriginalBlocksExeInstrCount > MaxPercentInstructionsToBeCopied; +} + +/// \brief Redirect CF path from Block to the block(BlockSucc) which is Block's +/// successor for incoming constant C. +/// +/// As the block(ConstSourceBlock) which is a source of constant C might not be +/// an immediate predecessor of Block all blocks between ConstSourceBlock and Block +/// must be copied. +/// +/// \param[in] Block This is a basic block from which CF is redirected to its successor. +/// \param[in] BlockSucc This is a block which is jumped to when an incoming constant is C. +/// \param[in] C The constant for which CF path is redirected. +/// \param[in] ConstSourceBlock The block which is a source of the constant. +/// \param[in] DataFlowStartPoint This is a list of phi-function which the constant goes through. +/// It's used to create a list of blocks to be copied. +/// +/// \returns true if CF path has been redirected. +bool AJT::redirectCFPathFromBlockToItsSuccForConst(BasicBlock *Block, + BasicBlock *BlockSucc, + ConstantInt *C, + BasicBlock *ConstSourceBlock, + PhiDataFlowPoint *DataFlowStartPoint) { + assert(C); + assert(ConstSourceBlock); + assert(DataFlowStartPoint); + assert(DataFlowStartPoint->getPhi()); + assert(Block); + assert(BlockSucc); + + // from the previous runs it's known that redirecting this constant does not improve CF + if (SkipConstantSources.count(ConstSourceBlock)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix << " Skipped: constant '" << *C + << "' from BB " << ConstSourceBlock->getName() << '\n'); + return false; + } + + // The goal is to optimise the original CF. + // Blocks sources of constants are not copied. So if we meet a block source of + // constant which is not original. It is a result of simplifying of CF: merging empty blocks. + // We will copy blocks which might be empty. After simplifying we will have the similar CF as before. + // Then optimising this again the third copy of the same CF will be created and so on. + // We have fallen into an infinitive copy cycle. + if (!OriginalBlocks.count(ConstSourceBlock)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix << " Copy cycle " + "detected. Skipped: constant '" << *C + << "' from BB " << ConstSourceBlock->getName() << '\n'); + SkipConstantSources.insert(ConstSourceBlock); // skip this constant in the future + return false; + } + + SmallVector BlocksToCopy; + createListOfBlocksToCopy(std::back_inserter(BlocksToCopy), DataFlowStartPoint, + Block); + + // Another possible infinitive copy cycle: + // Path of constant might look like: + // ConstSourceBlock -> BlockSucc -> SomeBlock -> Block -> BlockSucc + // Alternative path will be created: + // ConstSourceBlock -> BlockSucc(copy) -> SomeBlock(copy) -> BlockSucc + // 'ConstSourceBlock -> BlockSucc(copy) -> SomeBlock(copy)' might be merged into one block: + // ConstSourceBlock_new_merged_block + // So there will be the same CF: + // ConstSourceBlock_new_merged_block -> BlockSucc -> SomeBlock -> Block + if (std::find(BlocksToCopy.begin(), BlocksToCopy.end(), BlockSucc) != + BlocksToCopy.end()) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix << " Copy cycle " + "detected. Skipped: constant '" << *C + << "' from BB " << ConstSourceBlock->getName() << '\n'); + SkipConstantSources.insert(ConstSourceBlock); + return false; + } + + // Check if all blocks can be copied. + if (std::any_of( + BlocksToCopy.begin(), BlocksToCopy.end(), + [this](BasicBlock *B) { return !this->isBlockAllowedToCopy(*B); })) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Non-copyable node detected. " + "Skipped: constant '" << *C << "' from BB " + << ConstSourceBlock->getName() << '\n'); + return false; + } + + // Check if a path to the final Block is constructed. + // Only simple paths (without diamond-like structures etc.) are supported. + // TODO: support diamonds. + if (BlocksToCopy.empty() + || std::find(succ_begin(BlocksToCopy.back()), succ_end(BlocksToCopy.back()), Block) + == succ_end(BlocksToCopy.back())) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Complicated path detected. " + "Skipped: constant '" << *C << "' from BB " + << ConstSourceBlock->getName() << '\n'); + return false; + } + + if (isAmountOfInstructionsToCopyBig(BlocksToCopy)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Amount of instructions being copied is big. " + "Skipped: constant '" << *C << "' from BB " + << ConstSourceBlock->getName() << '\n'); + return false; + } + + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Copying blocks on path from " + << ConstSourceBlock->getName() << "\n"); + // Copy blocks + BasicBlock *BlockToRedirect = ConstSourceBlock; + for (auto BB : BlocksToCopy) { + // if a basic block has an unique predecessor there no other CF paths through it. + // So we don't need to copy it. + if (BB->getUniquePredecessor()) { + BlockToRedirect = BB; + continue; + } + + ValueToValueMapTy V2VMap; + BasicBlock *BBCopy = CloneBasicBlock( + BB, V2VMap, ".c" + Twine(++BasicBlockCopiesCounter), BB->getParent()); + changeBlockSuccToNewSuccCopy(BlockToRedirect, BB, BBCopy, V2VMap); + // IR must be valid after our transformations. + assert(verifyFunction(*(BlockSucc->getParent()), &dbgs()) == false); + + BlockToRedirect = BBCopy; + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Copied " << BB->getName() + << " as " << BBCopy->getName() << "\n"); + } + + // finally redirect CF from the specified block to its successor for constant C + changeBlockSuccToSuccOfSucc(BlockToRedirect, Block, BlockSucc); + assert(isBlockUnreachable(Block) || verifyFunction(*(BlockSucc->getParent()), &dbgs()) == false); + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Redirected CF for " << BlockToRedirect->getName() + << " from " << Block->getName() + << " to " << BlockSucc->getName() << "\n"); + return true; +} + +/// \brief Unfold a select instruction. +/// The select is converted into the following: +/// +/// Select Owner -- +/// | v +/// | NewBB +/// | | +/// |------------ +/// v +/// BB +/// +/// \returns phi-function which was an user of the select instruction or has been created. +/// nullptr is returned if the select instruction can not be unfolded for some reason. +PHINode *AJT::tryToUnfoldSelectInstruction(SelectInst &SI) { + BasicBlock *BB = SI.getParent(); + assert(BB); + + // the select defined value should not be used in the basic block + assert(!ReportTODO && "Implement unfolding a select instruction for the case: when there are users of it inside its basic block."); + if (SI.isUsedInBasicBlock(BB)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Skipped select instruction '" + << SI.getName() << "' because it is used inside its basic block.\n"); + return nullptr; + } + + // only IF-like basic blocks are supported + assert(!ReportTODO && "Implement unfolding a select instruction for the case: Terminator is not BranchInst."); + if (!isa(BB->getTerminator())) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Skipped select instruction '" + << SI.getName() << "' because its block Terminator is BranchInst.\n"); + return nullptr; + } + + BranchInst *BI = cast(BB->getTerminator()); + + // only unconditional branches are supported + assert(!ReportTODO && "Implement unfolding a select instruction for the case: BranchInst is conditional."); + if (BI->isConditional()) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Skipped select instruction '" + << SI.getName() << "' because its block BranchInst is conditional.\n"); + return nullptr; + } + + // there should be no other instructions between the select and the terminator. + // TODO: implement unfolding for such cases if they exist + assert(!ReportTODO && "Implement unfolding a select instruction for the case: there are instructions between the select instruction and the terminator."); + if (&(*(++BB->rbegin())) != &SI) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Skipped select instruction '" + << SI.getName() << "' because there are instructions between the select instruction and the terminator.\n"); + return nullptr; + } + + assert(BI->getNumSuccessors() == 1); + BasicBlock *BBSucc = BI->getSuccessor(0); + BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "select.unfold", + BB->getParent(), BBSucc); + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " New basic block created '" + << NewBB->getName() << '\n'); + // Add the new block to the set of original blocks if the select's block is in it. + if (OriginalBlocks.count(BB)) + UnfoldedOriginalBlocks.push_back(NewBB); + + // Move unconditional jump to the new created block + BI->removeFromParent(); + NewBB->getInstList().push_back(BI); + // Add conditional jump which uses select condition + BranchInst::Create(BBSucc, NewBB, SI.getCondition(), BB); + + // If in the successor the select is used in a phi-function. + // The phi-function will have two values and needs to be updated. + PHINode *Phi = nullptr; + for (User *U : SI.users()) { + if (PHINode *P = dyn_cast(U)) { + if (P->getParent() == BBSucc) { + Phi = P; + break; + } + } + } + + if (Phi) { + // remove incoming select value + Phi->removeIncomingValue(BB, false); + } else { + // no phi-functions have been found, create a new one + Phi = PHINode::Create(SI.getType(), 2, "", &BBSucc->front()); + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " New phi-function created '" + << Phi->getName() << '\n'); + SI.replaceAllUsesWith(Phi); + } + + Phi->addIncoming(SI.getTrueValue(), BB); + Phi->addIncoming(SI.getFalseValue(), NewBB); + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Deleted select instruction '" + << SI.getName() << '\n'); + SI.eraseFromParent(); + + // The successor might have other phi-functions. + // They need to be updated that the same value comes from the new created block. + for (auto PhiI = BBSucc->begin(); PHINode *P = dyn_cast(PhiI); + ++PhiI) { + if (P != Phi) + P->addIncoming(P->getIncomingValueForBlock(BB), NewBB); + } + return Phi; +} + +/// \brief Try to recursively unfold select instructions which are sources of +/// constants to the specified phi-function. +/// +/// \param[in] Phi phi-function to start searching select instructions from +/// \param[in,out] SeenPhis phi-functions which have been seen +/// \param[out] SelectInsts select insructions to unfold +/// +/// \returns true if select instructions have been unfolded. +void AJT::findSelectInstructionsToUnfoldRec( + PHINode *Phi, SmallPtrSetImpl &SeenPhis, + SetVector &SelectInsts) { + assert(Phi); + + if (!SeenPhis.insert(Phi).second) + return; + + // Phi-function might have more than one incoming edge from the same block. + // Keep history which incoming blocks have been seen. + SmallPtrSet SeenBlocks; + + // Analyse incoming values whether they might be constant. + // If an incoming value is an instruction selecting constants + // it is unfolded. + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { + if (!SeenBlocks.insert(Phi->getIncomingBlock(I)).second) + continue; + + Value *InValue = Phi->getIncomingValue(I); + if (PHINode *P = dyn_cast(InValue)) { + findSelectInstructionsToUnfoldRec(P, SeenPhis, SelectInsts); + } else if (SelectInst *SI = dyn_cast(InValue)) { + if (!isa(SI->getTrueValue()) + || !isa(SI->getFalseValue())) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << "Skip select instruction '" + << SI->getName() << "' because one of operands or both are not constant.\n"); + continue; + } + SelectInsts.insert(SI); + } + } +} + +/// \brief Try to unfold select instructions which are sources of constants to +/// the specified phi-function. +/// +/// \returns true if select instructions have been unfolded. +bool AJT::tryToUnfoldSelectInstructions(PHINode &StartPhi) { + SmallPtrSet SeenPhis; + SetVector SelectInsts; + findSelectInstructionsToUnfoldRec(&StartPhi, SeenPhis, SelectInsts); + bool Changed = false; + for (auto SI: SelectInsts) + if (tryToUnfoldSelectInstruction(*SI)) + Changed |= true; + return Changed; +} + +/// \brief Collect all constants which come in the terminator's basic block and +/// which the terminator depends on. +void AJT::TerminatorContext::collectIncomingConstants() { + SmallPtrSet SeenPhis; + collectConstants(StartingPoint, SeenPhis, IncomingConstants, DataFlowPoints, nullptr); +} + +/// \brief Try to create alternative control flow paths when there are a set of +/// current block incoming constants for which there are known terminator's successors. +/// +/// There are different types of terminators (e.g. SwitchInst, BranchInst). +/// A set of incoming constants and corresponding successors depend on terminator's +/// context. +/// +/// \returns true if alternative CF paths have been created or there have been any changes in CF. +bool AJT::createAlternativeCFPath(TerminatorContext &TerminatorCtxt) { + // TODO: Maybe we can teach TerminatorContext::collectIncomingConstants to collect + // constants without unfolding select instructions. + bool Changed = tryToUnfoldSelectInstructions(*TerminatorCtxt.getStartingPoint()); + + TerminatorCtxt.collectIncomingConstants(); + if (TerminatorCtxt.getIncomingConstants().empty()) + return Changed; + + BasicBlock *TerminatorBB = TerminatorCtxt.getTerminator()->getParent(); + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << "Attempting to create alternative path to " + << TerminatorBB->getName() << '\n'); + assert(TerminatorBB); + + LLVM_DEBUG( + dbgs() << AJTDbgMsgPrefix << "Incoming constants: "; + for (auto IC : TerminatorCtxt.getIncomingConstants()) { + dbgs() << " { " << std::get<0>(IC)->getValue() + << ", " << std::get<1>(IC)->getName() + << " }"; + } + dbgs() << "\n"; + ); + + for (auto IC : TerminatorCtxt.getIncomingConstants()) { + // IC is tuple of + ConstantInt *C = std::get<0>(IC); + assert(C); + + // Get a basic block to which CF jumps in case of an incoming constant C. + BasicBlock *TakenBB = TerminatorCtxt.getTerminatorSuccForConstant(C); + assert(TakenBB); + + // The first phi-function which the constant comes in. + PhiDataFlowPoint *DataFlowStartPoint = std::get<2>(IC); + assert(DataFlowStartPoint); + if (redirectCFPathFromBlockToItsSuccForConst(TerminatorBB, TakenBB, C, std::get<1>(IC), DataFlowStartPoint)) { + Changed = true; + if (isBlockUnreachable(TerminatorBB)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Deleting unreachable block: " + << TerminatorBB->getName() << '\n'); + deleteDeadBlock(TerminatorBB); + assert(verifyFunction(*(TakenBB->getParent()), &dbgs()) == false); // verify IR + break; // the terminator's basic block has been deleted there is no need to create other alternative paths. + } + } + } + + return Changed; +} + +/// \brief Check if the specified basic block is kind of blocks supported by AJT. +/// +/// \returns true if: +/// 1. The block does not have non-phi instructions which are +/// either calls or memory operations or define values used +/// outside the block. As the block is going to be jumped over +/// they won't be executed and there will incorrect data flow. +/// (TODO: split a basic block to have them in one part and a terminator in another part). +/// 2. And the block's terminator is a switch. +bool AJT::isSupported(const BasicBlock &B) { + for (const auto &Inst: B.getInstList()) { + if (!isa(&Inst) + && (isa(&Inst) + || isa(&Inst) + || Inst.mayReadOrWriteMemory() + || Inst.isUsedOutsideOfBlock(&B) + || (!Inst.use_empty() && !Inst.isUsedInBasicBlock(&B)))) { + assert(!ReportTODO && "Implement splitting basic blocks."); + return false; + } + } + + for (auto Succ = succ_begin(&B), SuccE = succ_end(&B); Succ != SuccE; ++Succ) { + for (auto I = Succ->begin(); isa(I); ++I) { + const PHINode *Phi = cast(I); + assert(Phi->getBasicBlockIndex(&B) != -1); + if (isa(Phi->getIncomingValueForBlock(&B))) { + return false; + } + } + } + + const Instruction *Terminator = B.getTerminator(); + if (isa(Terminator)) + return true; + + return false; +} + +/// \brief Apply the jump threading optimisation to the specified basic block. +/// If the optimisation can not be applied or there is no profit from it +/// the basic block is put on the skip list. +/// +/// \returns true if changes have been made: CF edge(s) redirected, BB deleted/created. +bool AJT::applyAJT(BasicBlock &BB) { + if (!isSupported(BB)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Skipped unsupported block: " << BB.getName() << '\n'); + return false; + } + + bool Changed = false; + Instruction *Terminator = BB.getTerminator(); + if (SwitchInst *SI = dyn_cast(Terminator)) { + if (PHINode *StartingPoint = dyn_cast(SI->getCondition())) { + SwitchInstContext Ctxt(StartingPoint, SI); + Changed = createAlternativeCFPath(Ctxt); + } + } else { + assert(0 && "Should not reach here"); + } + + return Changed; +} + +/// \brief Initialise AJT. +/// +/// It should be done before processing function CFG. +void AJT::init(Function &F) { + UnfoldedOriginalBlocks.clear(); + DeletedBlocksBin.clear(); + OriginalBlocks.clear(); + SkipConstantSources.clear(); +} + +/// \brief Create a set of pointers to original blocks. +/// +/// All unreachable blocks are removed before creating the set. +void AJT::createOriginalBlocksSet(Function &F) { + removeUnreachableBlocks(F); + auto I = F.begin(); + ++I; + for (auto E = F.end(); I != E; ++I) { + BasicBlock &BB = *I; + OriginalBlocks.insert(&BB); + } +} + +/// Apply constant fold optimisation to all instructions of the specified basic block. +static void constantFoldInstructionsInBlock(BasicBlock &BB, + const TargetLibraryInfo *TLI) { + const DataLayout &DL = BB.getModule()->getDataLayout(); + for (auto I = BB.begin(), E = BB.end(); I != E;) { + auto CurrI = I; + ++I; + if (Instruction *Inst = dyn_cast(CurrI)) { + Value *SimpleVal = ConstantFoldInstruction(Inst, DL, TLI); + if (SimpleVal) { + Inst->replaceAllUsesWith(SimpleVal); + Inst->eraseFromParent(); + } + } + } +} + +/// \brief Simplify function CFG. After applying the jump threading the function +/// CFG can have oportunities for optimisations: unreachable blocks, constant folding, +/// changing terminator type from conditional to unconditional, merging blocks. +/// +/// 1. Delete unreachable basic blocks which don't have predecessors. +/// 2. Apply constant folding to instructions in basic blocks. +/// 3. Apply constant folding to a basic block terminator. It can convert +/// conditional jumps into unconditional jumps. +/// 4. If two basic blocks are only connected they are merged onto one. The first +/// one (predecessor) is deleted. If two blocks make a loop they are not merged. +void AJT::simplifyCFG(Function& F) { + assert(!F.empty()); + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Applying constant folding to instructions of the entry block: " + << F.begin()->getName() << '\n'); + constantFoldInstructionsInBlock(*F.begin(), TLI); + + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Applying constant folding to terminator of the entry block: " + << F.begin()->getName() << '\n'); + ConstantFoldTerminator(&F.front(), true, TLI); + auto I = F.begin(); + ++I; + for (auto E = F.end(); I != E;) { + BasicBlock &BB = *I; + ++I; + + if (isBlockUnreachable(&BB)) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Deleting unreachable block: " + << BB.getName() << '\n'); + deleteDeadBlock(&BB); + continue; + } + + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Applying constant folding to instructions of : " + << BB.getName() << '\n'); + constantFoldInstructionsInBlock(BB, TLI); + + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Applying constant folding to terminator of : " + << BB.getName() << '\n'); + ConstantFoldTerminator(&BB, true, TLI); + + // Check there is no loop with the next block. + if (isPredecessorSinglyConnectedTo(BB) + && I != E && BB.getSinglePredecessor() != &*I) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix + << " Merging block with predecessor: " + << BB.getName() << '\n'); + mergeBlockWithSCPred(BB); + if (&F.getEntryBlock() == &BB) + removeFromOriginalBlocks(&BB); + } + } +} + +/// \brief Optimise control flow of the specified functions by jumping over +/// basic block when it is possible. +/// +/// It works iteratively. If there have been control flow changes +/// during processing of a basic block then the process is restarted as +/// there might be new oportunities for optimisation. +/// +/// Some basic block might be skipped because they should not be touched or +/// there is no performance gain from their optimisation or there are no +/// control flow paths to be optimised. +/// +/// \returns true on changes of \p F +bool AJT::run(Function &F) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix << "Processing function '" << F.getName() + << "'\n"); + init(F); + createOriginalBlocksSet(F); + + bool Changed = false; + bool Rerun; + do { + Rerun = false; + assert(UnfoldedOriginalBlocks.empty()); + assert(DeletedBlocksBin.empty()); + for (auto BB: OriginalBlocks) { + assert(&F.getEntryBlock() != BB); + assert(BB); + Rerun |= applyAJT(*BB); + } + Changed |= Rerun; + if (Rerun) { + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix << "Function '" << F.getName() + << "' CF changed. Rerun pass.\n"); + OriginalBlocks.insert(UnfoldedOriginalBlocks.begin(), UnfoldedOriginalBlocks.end()); + UnfoldedOriginalBlocks.clear(); + simplifyCFG(F); + eraseDeletedBlocks(); + } + } while (Rerun); + + LLVM_DEBUG(dbgs() << AJTDbgMsgPrefix << "Completed function '" << F.getName() + << "'\n"); + return Changed; +} + +//===----------------------------------------------------------------------===// +// +// Pass Manager integration code +// +//===----------------------------------------------------------------------===// +PreservedAnalyses AJTPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &SE = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &AC = AM.getResult(F); + auto &TLI = AM.getResult(F); + auto &TTI = AM.getResult(F); + + bool Changed = false; + for (auto &L : LI) { + Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, false /* PreserveLCSSA */); + Changed |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + Changed |= AJT(&TLI, &TTI).run(F); + + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_component_library(LLVMScalarOpts ADCE.cpp + AJT.cpp AlignmentFromAssumptions.cpp BDCE.cpp CallSiteSplitting.cpp diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp --- a/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -59,6 +59,7 @@ initializeInferAddressSpacesPass(Registry); initializeInstSimplifyLegacyPassPass(Registry); initializeJumpThreadingPass(Registry); + initializeAJTLegacyPassPass(Registry); initializeLegacyLICMPassPass(Registry); initializeLegacyLoopSinkPassPass(Registry); initializeLoopFuseLegacyPass(Registry); diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -249,6 +249,8 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O2-NEXT: Running pass: SLPVectorizerPass ; CHECK-O3-NEXT: Running pass: SLPVectorizerPass +; CHECK-O3-NEXT: Running pass: AJTPass +; CHECK-O3-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: VectorCombinePass ; CHECK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -219,6 +219,8 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-POSTLINK-O2-NEXT: Running pass: SLPVectorizerPass ; CHECK-POSTLINK-O3-NEXT: Running pass: SLPVectorizerPass +; CHECK-POSTLINK-O3-NEXT: Running pass: AJTPass +; CHECK-POSTLINK-O3-NEXT: Running pass: CorrelatedValuePropagationPass ; CHECK-POSTLINK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-POSTLINK-O-NEXT: Running pass: VectorCombinePass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -189,6 +189,8 @@ ; CHECK-O3-NEXT: Running pass: SLPVectorizerPass ; CHECK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: VectorCombinePass +; CHECK-O3-NEXT: AJTPass on foo +; CHECK-O3-NEXT: CorrelatedValuePropagationPass on foo ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -200,6 +200,8 @@ ; CHECK-O3-NEXT: Running pass: SLPVectorizerPass ; CHECK-Os-NEXT: Running pass: SLPVectorizerPass ; CHECK-O-NEXT: Running pass: VectorCombinePass +; CHECK-O3-NEXT: AJTPass +; CHECK-O3-NEXT: CorrelatedValuePropagationPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -261,6 +261,17 @@ ; CHECK-NEXT: Inject TLI Mappings ; CHECK-NEXT: SLP Vectorizer ; CHECK-NEXT: Optimize scalar/vector ops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Aggressive Jump Threading +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Lazy Value Information Analysis +; CHECK-NEXT: Value Propagation +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Natural Loop Information +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: Canonicalize natural loops diff --git a/llvm/test/Transforms/AJT/sel_constants.ll b/llvm/test/Transforms/AJT/sel_constants.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/AJT/sel_constants.ll @@ -0,0 +1,162 @@ +; RUN: opt < %s -ajt -S | FileCheck %s + +; Check unfolding of select instructions. +; +; '%state.13' is a phi-function using values from select instructions. +; It also has incoming constants and values from other phi-functions. +; Select instructions '%.' and '%.1' must be found and unfolded. +; A select instruction '%.2' must not be unfolded because one operand is not +; a constant. + +; CHECK: select i1 %v16 +; CHECK: select.unfold: +; CHECK: select.unfold1: + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.main-arm-none-eabi" + +; Function Attrs: norecurse nounwind +define hidden arm_aapcs_vfpcc i32 @core_state_transition(i8** nocapture %instr, i32* nocapture %transition_count) local_unnamed_addr #0 { +entry: + %0 = load i8*, i8** %instr, align 4 + %1 = load i8, i8* %0, align 1 + %tobool15 = icmp eq i8 %1, 0 + br i1 %tobool15, label %for.end, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %arrayidx36 = getelementptr inbounds i32, i32* %transition_count, i32 2 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc + %2 = phi i8 [ %1, %for.body.lr.ph ], [ %v18, %for.inc ] + %state.017 = phi i32 [ 0, %for.body.lr.ph ], [ %state.13, %for.inc ] + %str.016 = phi i8* [ %0, %for.body.lr.ph ], [ %incdec.ptr109, %for.inc ] + %cmp3 = icmp eq i8 %2, 44 + br i1 %cmp3, label %if.then, label %if.end + +if.then: ; preds = %for.body + %state.017.lcssa = phi i32 [ %state.017, %for.body ] + %str.016.lcssa = phi i8* [ %str.016, %for.body ] + %incdec.ptr = getelementptr inbounds i8, i8* %str.016.lcssa, i32 1 + br label %for.end + +if.end: ; preds = %for.body + switch i32 %state.017, label %for.inc [ + i32 0, label %sw.bb + i32 2, label %sw.bb25 + i32 4, label %sw.bb43 + i32 5, label %sw.bb58 + i32 3, label %sw.bb77 + i32 6, label %sw.bb92 + i32 7, label %sw.bb102 + ] + +sw.bb: ; preds = %if.end + %c.off.i = add i8 %2, -48 + %3 = icmp ugt i8 %c.off.i, 9 + br i1 %3, label %if.else, label %if.end22 + +if.else: ; preds = %sw.bb + switch i8 %2, label %if.else19 [ + i8 43, label %if.end22 + i8 45, label %if.end22 + i8 46, label %if.end22.fold.split + ] + +if.else19: ; preds = %if.else + br label %if.end22 + +if.end22.fold.split: ; preds = %if.else + br label %if.end22 + +if.end22: ; preds = %if.else, %if.else, %if.end22.fold.split, %if.else19, %sw.bb + %state.3 = phi i32 [ 4, %sw.bb ], [ 2, %if.else ], [ 1, %if.else19 ], [ 2, %if.else ], [ 5, %if.end22.fold.split ] + br label %for.inc + +sw.bb25: ; preds = %if.end + %c.off.i10 = add i8 %2, -48 + %v6 = icmp ugt i8 %c.off.i10, 9 + br i1 %v6, label %if.else31, label %if.then28 + +if.then28: ; preds = %sw.bb25 + br label %for.inc + +if.else31: ; preds = %sw.bb25 + %cmp33 = icmp eq i8 %2, 46 + %v8 = load i32, i32* %arrayidx36, align 4 + %inc37 = add i32 %v8, 1 + store i32 %inc37, i32* %arrayidx36, align 4 + %. = select i1 %cmp33, i32 5, i32 1 + br label %for.inc + +sw.bb43: ; preds = %if.end + %cmp45 = icmp eq i8 %2, 46 + br i1 %cmp45, label %if.then47, label %if.else50 + +if.then47: ; preds = %sw.bb43 + br label %for.inc + +if.else50: ; preds = %sw.bb43 + %c.off.i8 = add i8 %2, -48 + %v10 = icmp ugt i8 %c.off.i8, 9 + br i1 %v10, label %for.inc.thread, label %for.inc + +sw.bb58: ; preds = %if.end + switch i8 %2, label %if.else69 [ + i8 69, label %if.then66 + i8 101, label %if.then66 + ] + +if.then66: ; preds = %sw.bb58, %sw.bb58 + br label %for.inc + +if.else69: ; preds = %sw.bb58 + %c.off.i6 = add i8 %2, -48 + %v12 = icmp ugt i8 %c.off.i6, 9 + br i1 %v12, label %for.inc.thread, label %for.inc + +sw.bb77: ; preds = %if.end + switch i8 %2, label %for.inc.thread [ + i8 43, label %if.then85 + i8 45, label %if.then85 + ] + +if.then85: ; preds = %sw.bb77, %sw.bb77 + br label %for.inc + +sw.bb92: ; preds = %if.end + %c.off.i4 = add i8 %2, -48 + %v14 = icmp ugt i8 %c.off.i4, 9 + %.1 = select i1 %v14, i32 1, i32 7 + br label %for.inc + +sw.bb102: ; preds = %if.end + %c.off.i2 = add i8 %2, -48 + %v16 = icmp ugt i8 %c.off.i2, 9 + %v17 = load i32, i32* %arrayidx36, align 4 + %.2 = select i1 %v16, i32 %v17, i32 7 + br i1 %v16, label %for.inc.thread, label %for.inc + +for.inc.thread: ; preds = %if.else50, %sw.bb102, %sw.bb77, %if.else69 + br label %for.end + +for.inc: ; preds = %sw.bb92, %if.else31, %if.end22, %if.then28, %if.else50, %if.then47, %if.else69, %if.then66, %if.then85, %sw.bb102, %if.end + %state.13 = phi i32 [ %state.3, %if.end22 ], [ 4, %if.then28 ], [ 5, %if.then47 ], [ 4, %if.else50 ], [ 3, %if.then66 ], [ 5, %if.else69 ], [ 6, %if.then85 ], [ %.2, %sw.bb102 ], [ %state.017, %if.end ], [ %., %if.else31 ], [ %.1, %sw.bb92 ] + %incdec.ptr109 = getelementptr inbounds i8, i8* %str.016, i32 1 + %v18 = load i8, i8* %incdec.ptr109, align 1 + %tobool = icmp ne i8 %v18, 0 + %cmp = icmp ne i32 %state.13, 1 + %or.cond = and i1 %cmp, %tobool + br i1 %or.cond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.inc + %state.13.lcssa = phi i32 [ %state.13, %for.inc ] + br label %for.end + +for.end: ; preds = %for.end.loopexit, %for.inc.thread, %entry, %if.then + %state.013 = phi i32 [ %state.017.lcssa, %if.then ], [ 0, %entry ], [ 1, %for.inc.thread ], [ %state.13.lcssa, %for.end.loopexit ] + ret i32 %state.013 +} + +attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m33" "target-features"="+d16,+dsp,+fp-armv8,+fp-only-sp,+hwdiv,+thumb-mode,-crc,-crypto,-dotprod,-fullfp16,-hwdiv-arm,-neon,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" } + diff --git a/llvm/test/Transforms/AJT/switch.no_opt.ll b/llvm/test/Transforms/AJT/switch.no_opt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/AJT/switch.no_opt.ll @@ -0,0 +1,115 @@ +; RUN: opt < %s -ajt -S | FileCheck %s +; RUN: opt < %s -passes="ajt" -S | FileCheck %s + +define void @test(i8* %s, i32* dereferenceable(4) %zeros, i32* dereferenceable(4) %ones) nounwind { +entry: + store i32 0, i32* %zeros, align 4 + store i32 0, i32* %ones, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %state.0 = phi i32 [ -1, %entry ], [ %state.7, %for.inc ] + %s.addr.0 = phi i8* [ %s, %entry ], [ %incdec.ptr, %for.inc ] + %0 = load i8, i8* %s.addr.0, align 1 + %tobool = icmp ne i8 %0, 0 + br i1 %tobool, label %for.body, label %for.end + +for.body: ; preds = %for.cond + switch i32 %state.0, label %for.inc [ + i32 -1, label %sw.bb + i32 0, label %sw.bb7 + i32 1, label %sw.bb20 + i32 2, label %sw.bb33 + ] + +sw.bb: ; preds = %for.body + %1 = load i8, i8* %s.addr.0, align 1 + %conv = sext i8 %1 to i32 + %cmp = icmp eq i32 %conv, 48 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %sw.bb + %2 = load i32, i32* %zeros, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %zeros, align 4 + br label %for.inc + +if.else: ; preds = %sw.bb + %3 = load i8, i8* %s.addr.0, align 1 + %conv1 = sext i8 %3 to i32 + %cmp2 = icmp eq i32 %conv1, 49 + br i1 %cmp2, label %if.then3, label %for.inc + +if.then3: ; preds = %if.else + %4 = load i32, i32* %ones, align 4 + %inc4 = add nsw i32 %4, 1 + store i32 %inc4, i32* %ones, align 4 + br label %for.inc + +sw.bb7: ; preds = %for.body + %5 = load i8, i8* %s.addr.0, align 1 + %conv8 = sext i8 %5 to i32 + %cmp9 = icmp eq i32 %conv8, 48 + br i1 %cmp9, label %if.then10, label %if.else12 + +if.then10: ; preds = %sw.bb7 + %6 = load i32, i32* %zeros, align 4 + %inc11 = add nsw i32 %6, 1 + store i32 %inc11, i32* %zeros, align 4 + br label %for.inc + +if.else12: ; preds = %sw.bb7 + %7 = load i8, i8* %s.addr.0, align 1 + %conv13 = sext i8 %7 to i32 + %cmp14 = icmp eq i32 %conv13, 49 + br i1 %cmp14, label %if.then15, label %for.inc + +if.then15: ; preds = %if.else12 + %8 = load i32, i32* %ones, align 4 + %inc16 = add nsw i32 %8, 1 + store i32 %inc16, i32* %ones, align 4 + br label %for.inc + +sw.bb20: ; preds = %for.body + %9 = load i8, i8* %s.addr.0, align 1 + %conv21 = sext i8 %9 to i32 + %cmp22 = icmp eq i32 %conv21, 48 + br i1 %cmp22, label %if.then23, label %if.else25 + +if.then23: ; preds = %sw.bb20 + %10 = load i32, i32* %zeros, align 4 + %inc24 = add nsw i32 %10, 1 + store i32 %inc24, i32* %zeros, align 4 + br label %for.inc + +if.else25: ; preds = %sw.bb20 + %11 = load i8, i8* %s.addr.0, align 1 + %conv26 = sext i8 %11 to i32 + %cmp27 = icmp eq i32 %conv26, 49 + br i1 %cmp27, label %if.then28, label %for.inc + +if.then28: ; preds = %if.else25 + %12 = load i32, i32* %ones, align 4 + %inc29 = add nsw i32 %12, 1 + store i32 %inc29, i32* %ones, align 4 + br label %for.inc + +sw.bb33: ; preds = %for.body + store i32 -1, i32* %zeros, align 4 + store i32 -1, i32* %ones, align 4 + br label %for.end + +for.inc: ; preds = %for.body, %if.then3, %if.else, %if.then, %if.then15, %if.else12, %if.then10, %if.then28, %if.else25, %if.then23 + %state.7 = phi i32 [ %state.0, %for.body ], [ 0, %if.then ], [ 1, %if.then3 ], [ 2, %if.else ], [ %state.0, %if.then10 ], [ 1, %if.then15 ], [ 2, %if.else12 ], [ 0, %if.then23 ], [ %state.0, %if.then28 ], [ 2, %if.else25 ] + %incdec.ptr = getelementptr inbounds i8, i8* %s.addr.0, i32 1 + br label %for.cond + +for.end: ; preds = %sw.bb33, %for.cond + ret void + +; CHECK: for.cond.c2 +; CHECK: for.cond.c4 +; CHECK: for.cond.c14 + +} + diff --git a/llvm/test/Transforms/AJT/switch.o3.ll b/llvm/test/Transforms/AJT/switch.o3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/AJT/switch.o3.ll @@ -0,0 +1,97 @@ +; RUN: opt < %s -ajt -S | FileCheck %s +; RUN: opt < %s -passes="ajt" -S | FileCheck %s + +define void @test(i8* nocapture readonly %s, i32* nocapture dereferenceable(4) %zeros, i32* nocapture dereferenceable(4) %ones) nounwind { +entry: + store i32 0, i32* %zeros, align 4 + store i32 0, i32* %ones, align 4 + %0 = load i8, i8* %s, align 1 + %tobool16 = icmp eq i8 %0, 0 + br i1 %tobool16, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.inc + %1 = phi i8 [ %8, %for.inc ], [ %0, %entry ] + %state.018 = phi i32 [ %state.1, %for.inc ], [ -1, %entry ] + %s.addr.017 = phi i8* [ %incdec.ptr, %for.inc ], [ %s, %entry ] + switch i32 %state.018, label %for.inc [ + i32 -1, label %sw.bb + i32 0, label %sw.bb7 + i32 1, label %sw.bb20 + i32 2, label %sw.bb33 + ] + +sw.bb: ; preds = %for.body + switch i8 %1, label %for.inc [ + i8 48, label %if.then + i8 49, label %if.then3 + ] + +if.then: ; preds = %sw.bb + %2 = load i32, i32* %zeros, align 4 + %inc = add nsw i32 %2, 1 + store i32 %inc, i32* %zeros, align 4 + br label %for.inc + +if.then3: ; preds = %sw.bb + %3 = load i32, i32* %ones, align 4 + %inc4 = add nsw i32 %3, 1 + store i32 %inc4, i32* %ones, align 4 + br label %for.inc + +sw.bb7: ; preds = %for.body + switch i8 %1, label %for.inc [ + i8 48, label %if.then10 + i8 49, label %if.then15 + ] + +if.then10: ; preds = %sw.bb7 + %4 = load i32, i32* %zeros, align 4 + %inc11 = add nsw i32 %4, 1 + store i32 %inc11, i32* %zeros, align 4 + br label %for.inc + +if.then15: ; preds = %sw.bb7 + %5 = load i32, i32* %ones, align 4 + %inc16 = add nsw i32 %5, 1 + store i32 %inc16, i32* %ones, align 4 + br label %for.inc + +sw.bb20: ; preds = %for.body + switch i8 %1, label %for.inc [ + i8 48, label %if.then23 + i8 49, label %if.then28 + ] + +if.then23: ; preds = %sw.bb20 + %6 = load i32, i32* %zeros, align 4 + %inc24 = add nsw i32 %6, 1 + store i32 %inc24, i32* %zeros, align 4 + br label %for.inc + +if.then28: ; preds = %sw.bb20 + %7 = load i32, i32* %ones, align 4 + %inc29 = add nsw i32 %7, 1 + store i32 %inc29, i32* %ones, align 4 + br label %for.inc + +sw.bb33: ; preds = %for.body + store i32 -1, i32* %zeros, align 4 + store i32 -1, i32* %ones, align 4 + br label %for.end + +for.inc: ; preds = %sw.bb20, %sw.bb7, %sw.bb, %for.body, %if.then3, %if.then, %if.then15, %if.then10, %if.then28, %if.then23 + %state.1 = phi i32 [ %state.018, %for.body ], [ 0, %if.then23 ], [ 1, %if.then28 ], [ 0, %if.then10 ], [ 1, %if.then15 ], [ 0, %if.then ], [ 1, %if.then3 ], [ 2, %sw.bb ], [ 2, %sw.bb7 ], [ 2, %sw.bb20 ] + %incdec.ptr = getelementptr inbounds i8, i8* %s.addr.017, i64 1 + %8 = load i8, i8* %incdec.ptr, align 1 + %tobool = icmp eq i8 %8, 0 + br i1 %tobool, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry, %sw.bb33 + ret void +; CHECK: for.inc.c1: +; CHECK: for.inc.c2: +; CHECK: for.inc.c3: +; CHECK: for.inc.c9: + +} +