diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -165,7 +165,6 @@ ; CHECK-O: Running pass: InstSimplifyPass on main ; CHECK-O: Running pass: DivRemPairsPass on main ; CHECK-O: Running pass: SimplifyCFGPass on main -; CHECK-O: Running pass: SpeculateAroundPHIsPass on main ; CHECK-O: Running pass: CGProfilePass ; CHECK-O: Running pass: GlobalDCEPass ; CHECK-O: Running pass: ConstantMergePass diff --git a/llvm/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h b/llvm/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h deleted file mode 100644 --- a/llvm/include/llvm/Transforms/Scalar/SpeculateAroundPHIs.h +++ /dev/null @@ -1,109 +0,0 @@ -//===- SpeculateAroundPHIs.h - Speculate around PHIs ------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_SCALAR_SPECULATEAROUNDPHIS_H -#define LLVM_TRANSFORMS_SCALAR_SPECULATEAROUNDPHIS_H - -#include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/PassManager.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { - -/// This pass handles simple speculating of instructions around PHIs when -/// doing so is profitable for a particular target despite duplicated -/// instructions. -/// -/// The motivating example are PHIs of constants which will require -/// materializing the constants along each edge. If the PHI is used by an -/// instruction where the target can materialize the constant as part of the -/// instruction, it is profitable to speculate those instructions around the -/// PHI node. This can reduce dynamic instruction count as well as decrease -/// register pressure. -/// -/// Consider this IR for example: -/// ``` -/// entry: -/// br i1 %flag, label %a, label %b -/// -/// a: -/// br label %exit -/// -/// b: -/// br label %exit -/// -/// exit: -/// %p = phi i32 [ 7, %a ], [ 11, %b ] -/// %sum = add i32 %arg, %p -/// ret i32 %sum -/// ``` -/// To materialize the inputs to this PHI node may require an explicit -/// instruction. For example, on x86 this would turn into something like -/// ``` -/// testq %eax, %eax -/// movl $7, %rNN -/// jne .L -/// movl $11, %rNN -/// .L: -/// addl %edi, %rNN -/// movl %rNN, %eax -/// retq -/// ``` -/// When these constants can be folded directly into another instruction, it -/// would be preferable to avoid the potential for register pressure (above we -/// can easily avoid it, but that isn't always true) and simply duplicate the -/// instruction using the PHI: -/// ``` -/// entry: -/// br i1 %flag, label %a, label %b -/// -/// a: -/// %sum.1 = add i32 %arg, 7 -/// br label %exit -/// -/// b: -/// %sum.2 = add i32 %arg, 11 -/// br label %exit -/// -/// exit: -/// %p = phi i32 [ %sum.1, %a ], [ %sum.2, %b ] -/// ret i32 %p -/// ``` -/// Which will generate something like the following on x86: -/// ``` -/// testq %eax, %eax -/// addl $7, %edi -/// jne .L -/// addl $11, %edi -/// .L: -/// movl %edi, %eax -/// retq -/// ``` -/// -/// It is important to note that this pass is never intended to handle more -/// complex cases where speculating around PHIs allows simplifications of the -/// IR itself or other subsequent optimizations. Those can and should already -/// be handled before this pass is ever run by a more powerful analysis that -/// can reason about equivalences and common subexpressions. Classically, those -/// cases would be handled by a GVN-powered PRE or similar transform. This -/// pass, in contrast, is *only* interested in cases where despite no -/// simplifications to the IR itself, speculation is *faster* to execute. The -/// result of this is that the cost models which are appropriate to consider -/// here are relatively simple ones around execution and codesize cost, without -/// any need to consider simplifications or other transformations. -struct SpeculateAroundPHIsPass : PassInfoMixin { - /// Run the pass over the function. - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - -} // end namespace llvm - -#endif // LLVM_TRANSFORMS_SCALAR_SPECULATEAROUNDPHIS_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -203,7 +203,6 @@ #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/Sink.h" -#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/Transforms/Scalar/StructurizeCFG.h" @@ -1448,11 +1447,6 @@ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. OptimizePM.addPass(SimplifyCFGPass()); - // Optimize PHIs by speculating around them when profitable. Note that this - // pass needs to be run after any PRE or similar pass as it is essentially - // inserting redundancies into the program. This even includes SimplifyCFG. - OptimizePM.addPass(SpeculateAroundPHIsPass()); - if (PTO.Coroutines) OptimizePM.addPass(CoroCleanupPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -306,7 +306,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) -FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass()) FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -73,7 +73,6 @@ SimplifyCFGPass.cpp Sink.cpp SpeculativeExecution.cpp - SpeculateAroundPHIs.cpp StraightLineStrengthReduce.cpp StructurizeCFG.cpp TailRecursionElimination.cpp diff --git a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp deleted file mode 100644 --- a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ /dev/null @@ -1,832 +0,0 @@ -//===- SpeculateAroundPHIs.cpp --------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/Sequence.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -using namespace llvm; - -#define DEBUG_TYPE "spec-phis" - -STATISTIC(NumPHIsSpeculated, "Number of PHI nodes we speculated around"); -STATISTIC(NumEdgesSplit, - "Number of critical edges which were split for speculation"); -STATISTIC(NumSpeculatedInstructions, - "Number of instructions we speculated around the PHI nodes"); -STATISTIC(NumNewRedundantInstructions, - "Number of new, redundant instructions inserted"); - -/// Check whether speculating the users of a PHI node around the PHI -/// will be safe. -/// -/// This checks both that all of the users are safe and also that all of their -/// operands are either recursively safe or already available along an incoming -/// edge to the PHI. -/// -/// This routine caches both all the safe nodes explored in `PotentialSpecSet` -/// and the chain of nodes that definitively reach any unsafe node in -/// `UnsafeSet`. By preserving these between repeated calls to this routine for -/// PHIs in the same basic block, the exploration here can be reused. However, -/// these caches must no be reused for PHIs in a different basic block as they -/// reflect what is available along incoming edges. -static bool -isSafeToSpeculatePHIUsers(PHINode &PN, DominatorTree &DT, - SmallPtrSetImpl &PotentialSpecSet, - SmallPtrSetImpl &UnsafeSet) { - auto *PhiBB = PN.getParent(); - SmallPtrSet Visited; - SmallVector, 16> DFSStack; - - // Walk each user of the PHI node. - for (Use &U : PN.uses()) { - auto *UI = cast(U.getUser()); - - // Ensure the use post-dominates the PHI node. This ensures that, in the - // absence of unwinding, the use will actually be reached. - // FIXME: We use a blunt hammer of requiring them to be in the same basic - // block. We should consider using actual post-dominance here in the - // future. - if (UI->getParent() != PhiBB) { - LLVM_DEBUG(dbgs() << " Unsafe: use in a different BB: " << *UI << "\n"); - return false; - } - - if (const auto *CS = dyn_cast(UI)) { - if (CS->isConvergent() || CS->cannotDuplicate()) { - LLVM_DEBUG(dbgs() << " Unsafe: convergent " - "callsite cannot de duplicated: " << *UI << '\n'); - return false; - } - } - - // FIXME: This check is much too conservative. We're not going to move these - // instructions onto new dynamic paths through the program unless there is - // a call instruction between the use and the PHI node. And memory isn't - // changing unless there is a store in that same sequence. We should - // probably change this to do at least a limited scan of the intervening - // instructions and allow handling stores in easily proven safe cases. - if (mayBeMemoryDependent(*UI)) { - LLVM_DEBUG(dbgs() << " Unsafe: can't speculate use: " << *UI << "\n"); - return false; - } - - // Now do a depth-first search of everything these users depend on to make - // sure they are transitively safe. This is a depth-first search, but we - // check nodes in preorder to minimize the amount of checking. - Visited.insert(UI); - DFSStack.push_back({UI, UI->value_op_begin()}); - do { - User::value_op_iterator OpIt; - std::tie(UI, OpIt) = DFSStack.pop_back_val(); - - while (OpIt != UI->value_op_end()) { - auto *OpI = dyn_cast(*OpIt); - // Increment to the next operand for whenever we continue. - ++OpIt; - // No need to visit non-instructions, which can't form dependencies. - if (!OpI) - continue; - - // Now do the main pre-order checks that this operand is a viable - // dependency of something we want to speculate. - - // First do a few checks for instructions that won't require - // speculation at all because they are trivially available on the - // incoming edge (either through dominance or through an incoming value - // to a PHI). - // - // The cases in the current block will be trivially dominated by the - // edge. - auto *ParentBB = OpI->getParent(); - if (ParentBB == PhiBB) { - if (isa(OpI)) { - // We can trivially map through phi nodes in the same block. - continue; - } - } else if (DT.dominates(ParentBB, PhiBB)) { - // Instructions from dominating blocks are already available. - continue; - } - - // Once we know that we're considering speculating the operand, check - // if we've already explored this subgraph and found it to be safe. - if (PotentialSpecSet.count(OpI)) - continue; - - // If we've already explored this subgraph and found it unsafe, bail. - // If when we directly test whether this is safe it fails, bail. - if (UnsafeSet.count(OpI) || ParentBB != PhiBB || - mayBeMemoryDependent(*OpI)) { - LLVM_DEBUG(dbgs() << " Unsafe: can't speculate transitive use: " - << *OpI << "\n"); - // Record the stack of instructions which reach this node as unsafe - // so we prune subsequent searches. - UnsafeSet.insert(OpI); - for (auto &StackPair : DFSStack) { - Instruction *I = StackPair.first; - UnsafeSet.insert(I); - } - return false; - } - - // Skip any operands we're already recursively checking. - if (!Visited.insert(OpI).second) - continue; - - // Push onto the stack and descend. We can directly continue this - // loop when ascending. - DFSStack.push_back({UI, OpIt}); - UI = OpI; - OpIt = OpI->value_op_begin(); - } - - // This node and all its operands are safe. Go ahead and cache that for - // reuse later. - PotentialSpecSet.insert(UI); - - // Continue with the next node on the stack. - } while (!DFSStack.empty()); - } - -#ifndef NDEBUG - // Every visited operand should have been marked as safe for speculation at - // this point. Verify this and return success. - for (auto *I : Visited) - assert(PotentialSpecSet.count(I) && - "Failed to mark a visited instruction as safe!"); -#endif - return true; -} - -/// Check whether, in isolation, a given PHI node is both safe and profitable -/// to speculate users around. -/// -/// This handles checking whether there are any constant operands to a PHI -/// which could represent a useful speculation candidate, whether the users of -/// the PHI are safe to speculate including all their transitive dependencies, -/// and whether after speculation there will be some cost savings (profit) to -/// folding the operands into the users of the PHI node. Returns true if both -/// safe and profitable with relevant cost savings updated in the map and with -/// an update to the `PotentialSpecSet`. Returns false if either safety or -/// profitability are absent. Some new entries may be made to the -/// `PotentialSpecSet` even when this routine returns false, but they remain -/// conservatively correct. -/// -/// The profitability check here is a local one, but it checks this in an -/// interesting way. Beyond checking that the total cost of materializing the -/// constants will be less than the cost of folding them into their users, it -/// also checks that no one incoming constant will have a higher cost when -/// folded into its users rather than materialized. This higher cost could -/// result in a dynamic *path* that is more expensive even when the total cost -/// is lower. Currently, all of the interesting cases where this optimization -/// should fire are ones where it is a no-loss operation in this sense. If we -/// ever want to be more aggressive here, we would need to balance the -/// different incoming edges' cost by looking at their respective -/// probabilities. -static bool isSafeAndProfitableToSpeculateAroundPHI( - PHINode &PN, SmallDenseMap &CostSavingsMap, - SmallPtrSetImpl &PotentialSpecSet, - SmallPtrSetImpl &UnsafeSet, DominatorTree &DT, - TargetTransformInfo &TTI) { - // First see whether there is any cost savings to speculating around this - // PHI, and build up a map of the constant inputs to how many times they - // occur. - bool NonFreeMat = false; - struct CostsAndCount { - InstructionCost MatCost = TargetTransformInfo::TCC_Free; - InstructionCost FoldedCost = TargetTransformInfo::TCC_Free; - int Count = 0; - }; - SmallDenseMap CostsAndCounts; - SmallPtrSet IncomingConstantBlocks; - for (int i : llvm::seq(0, PN.getNumIncomingValues())) { - auto *IncomingC = dyn_cast(PN.getIncomingValue(i)); - if (!IncomingC) - continue; - - // Only visit each incoming edge with a constant input once. - if (!IncomingConstantBlocks.insert(PN.getIncomingBlock(i)).second) - continue; - - auto InsertResult = CostsAndCounts.insert({IncomingC, {}}); - // Count how many edges share a given incoming costant. - ++InsertResult.first->second.Count; - // Only compute the cost the first time we see a particular constant. - if (!InsertResult.second) - continue; - - InstructionCost &MatCost = InsertResult.first->second.MatCost; - MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(), - TargetTransformInfo::TCK_SizeAndLatency); - NonFreeMat |= MatCost != TTI.TCC_Free; - } - if (!NonFreeMat) { - LLVM_DEBUG(dbgs() << " Free: " << PN << "\n"); - // No profit in free materialization. - return false; - } - - // Now check that the uses of this PHI can actually be speculated, - // otherwise we'll still have to materialize the PHI value. - if (!isSafeToSpeculatePHIUsers(PN, DT, PotentialSpecSet, UnsafeSet)) { - LLVM_DEBUG(dbgs() << " Unsafe PHI: " << PN << "\n"); - return false; - } - - // Compute how much (if any) savings are available by speculating around this - // PHI. - for (Use &U : PN.uses()) { - auto *UserI = cast(U.getUser()); - // Now check whether there is any savings to folding the incoming constants - // into this use. - unsigned Idx = U.getOperandNo(); - - // If we have a binary operator that is commutative, an actual constant - // operand would end up on the RHS, so pretend the use of the PHI is on the - // RHS. - // - // Technically, this is a bit weird if *both* operands are PHIs we're - // speculating. But if that is the case, giving an "optimistic" cost isn't - // a bad thing because after speculation it will constant fold. And - // moreover, such cases should likely have been constant folded already by - // some other pass, so we shouldn't worry about "modeling" them terribly - // accurately here. Similarly, if the other operand is a constant, it still - // seems fine to be "optimistic" in our cost modeling, because when the - // incoming operand from the PHI node is also a constant, we will end up - // constant folding. - if (UserI->isBinaryOp() && UserI->isCommutative() && Idx != 1) - // Assume we will commute the constant to the RHS to be canonical. - Idx = 1; - - // Get the intrinsic ID if this user is an intrinsic. - Intrinsic::ID IID = Intrinsic::not_intrinsic; - if (auto *UserII = dyn_cast(UserI)) - IID = UserII->getIntrinsicID(); - - for (auto &IncomingConstantAndCostsAndCount : CostsAndCounts) { - ConstantInt *IncomingC = IncomingConstantAndCostsAndCount.first; - InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost; - InstructionCost &FoldedCost = - IncomingConstantAndCostsAndCount.second.FoldedCost; - if (IID) - FoldedCost += - TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(), - IncomingC->getType(), - TargetTransformInfo::TCK_SizeAndLatency); - else - FoldedCost += - TTI.getIntImmCostInst(UserI->getOpcode(), Idx, - IncomingC->getValue(), IncomingC->getType(), - TargetTransformInfo::TCK_SizeAndLatency); - - // If we accumulate more folded cost for this incoming constant than - // materialized cost, then we'll regress any edge with this constant so - // just bail. We're only interested in cases where folding the incoming - // constants is at least break-even on all paths. - if (FoldedCost > MatCost) { - LLVM_DEBUG(dbgs() << " Not profitable to fold imm: " << *IncomingC - << "\n" - " Materializing cost: " - << MatCost - << "\n" - " Accumulated folded cost: " - << FoldedCost << "\n"); - return false; - } - } - } - - // Compute the total cost savings afforded by this PHI node. - InstructionCost TotalMatCost = TTI.TCC_Free, TotalFoldedCost = TTI.TCC_Free; - for (auto IncomingConstantAndCostsAndCount : CostsAndCounts) { - InstructionCost MatCost = IncomingConstantAndCostsAndCount.second.MatCost; - InstructionCost FoldedCost = - IncomingConstantAndCostsAndCount.second.FoldedCost; - int Count = IncomingConstantAndCostsAndCount.second.Count; - - TotalMatCost += MatCost * Count; - TotalFoldedCost += FoldedCost * Count; - } - assert(TotalMatCost.isValid() && "Constants must be materializable"); - assert(TotalFoldedCost <= TotalMatCost && "If each constant's folded cost is " - "less that its materialized cost, " - "the sum must be as well."); - LLVM_DEBUG(dbgs() << " Cost savings " << (TotalMatCost - TotalFoldedCost) - << ": " << PN << "\n"); - CostSavingsMap[&PN] = TotalMatCost - TotalFoldedCost; - return true; -} - -/// Simple helper to walk all the users of a list of phis depth first, and call -/// a visit function on each one in post-order. -/// -/// All of the PHIs should be in the same basic block, and this is primarily -/// used to make a single depth-first walk across their collective users -/// without revisiting any subgraphs. Callers should provide a fast, idempotent -/// callable to test whether a node has been visited and the more important -/// callable to actually visit a particular node. -/// -/// Depth-first and postorder here refer to the *operand* graph -- we start -/// from a collection of users of PHI nodes and walk "up" the operands -/// depth-first. -template -static void visitPHIUsersAndDepsInPostOrder(ArrayRef PNs, - IsVisitedT IsVisited, - VisitT Visit) { - SmallVector, 16> DFSStack; - for (auto *PN : PNs) - for (Use &U : PN->uses()) { - auto *UI = cast(U.getUser()); - if (IsVisited(UI)) - // Already visited this user, continue across the roots. - continue; - - // Otherwise, walk the operand graph depth-first and visit each - // dependency in postorder. - DFSStack.push_back({UI, UI->value_op_begin()}); - do { - User::value_op_iterator OpIt; - std::tie(UI, OpIt) = DFSStack.pop_back_val(); - while (OpIt != UI->value_op_end()) { - auto *OpI = dyn_cast(*OpIt); - // Increment to the next operand for whenever we continue. - ++OpIt; - // No need to visit non-instructions, which can't form dependencies, - // or instructions outside of our potential dependency set that we - // were given. Finally, if we've already visited the node, continue - // to the next. - if (!OpI || IsVisited(OpI)) - continue; - - // Push onto the stack and descend. We can directly continue this - // loop when ascending. - DFSStack.push_back({UI, OpIt}); - UI = OpI; - OpIt = OpI->value_op_begin(); - } - - // Finished visiting children, visit this node. - assert(!IsVisited(UI) && "Should not have already visited a node!"); - Visit(UI); - } while (!DFSStack.empty()); - } -} - -/// Find profitable PHIs to speculate. -/// -/// For a PHI node to be profitable, we need the cost of speculating its users -/// (and their dependencies) to not exceed the savings of folding the PHI's -/// constant operands into the speculated users. -/// -/// Computing this is surprisingly challenging. Because users of two different -/// PHI nodes can depend on each other or on common other instructions, it may -/// be profitable to speculate two PHI nodes together even though neither one -/// in isolation is profitable. The straightforward way to find all the -/// profitable PHIs would be to check each combination of PHIs' cost, but this -/// is exponential in complexity. -/// -/// Even if we assume that we only care about cases where we can consider each -/// PHI node in isolation (rather than considering cases where none are -/// profitable in isolation but some subset are profitable as a set), we still -/// have a challenge. The obvious way to find all individually profitable PHIs -/// is to iterate until reaching a fixed point, but this will be quadratic in -/// complexity. =/ -/// -/// This code currently uses a linear-to-compute order for a greedy approach. -/// It won't find cases where a set of PHIs must be considered together, but it -/// handles most cases of order dependence without quadratic iteration. The -/// specific order used is the post-order across the operand DAG. When the last -/// user of a PHI is visited in this postorder walk, we check it for -/// profitability. -/// -/// There is an orthogonal extra complexity to all of this: computing the cost -/// itself can easily become a linear computation making everything again (at -/// best) quadratic. Using a postorder over the operand graph makes it -/// particularly easy to avoid this through dynamic programming. As we do the -/// postorder walk, we build the transitive cost of that subgraph. It is also -/// straightforward to then update these costs when we mark a PHI for -/// speculation so that subsequent PHIs don't re-pay the cost of already -/// speculated instructions. -static SmallVector findProfitablePHIs( - ArrayRef PNs, - const SmallDenseMap &CostSavingsMap, - const SmallPtrSetImpl &PotentialSpecSet, int NumPreds, - DominatorTree &DT, TargetTransformInfo &TTI) { - SmallVector SpecPNs; - - // First, establish a reverse mapping from immediate users of the PHI nodes - // to the nodes themselves, and count how many users each PHI node has in - // a way we can update while processing them. - SmallDenseMap, 16> UserToPNMap; - SmallDenseMap PNUserCountMap; - SmallPtrSet UserSet; - for (auto *PN : PNs) { - assert(UserSet.empty() && "Must start with an empty user set!"); - for (Use &U : PN->uses()) - UserSet.insert(cast(U.getUser())); - PNUserCountMap[PN] = UserSet.size(); - for (auto *UI : UserSet) - UserToPNMap.insert({UI, {}}).first->second.push_back(PN); - UserSet.clear(); - } - - // Now do a DFS across the operand graph of the users, computing cost as we - // go and when all costs for a given PHI are known, checking that PHI for - // profitability. - SmallDenseMap SpecCostMap; - visitPHIUsersAndDepsInPostOrder( - PNs, - /*IsVisited*/ - [&](Instruction *I) { - // We consider anything that isn't potentially speculated to be - // "visited" as it is already handled. Similarly, anything that *is* - // potentially speculated but for which we have an entry in our cost - // map, we're done. - return !PotentialSpecSet.count(I) || SpecCostMap.count(I); - }, - /*Visit*/ - [&](Instruction *I) { - // We've fully visited the operands, so sum their cost with this node - // and update the cost map. - InstructionCost Cost = TTI.TCC_Free; - for (Value *OpV : I->operand_values()) - if (auto *OpI = dyn_cast(OpV)) { - auto CostMapIt = SpecCostMap.find(OpI); - if (CostMapIt != SpecCostMap.end()) - Cost += CostMapIt->second; - } - Cost += TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency); - bool Inserted = SpecCostMap.insert({I, Cost}).second; - (void)Inserted; - assert(Inserted && "Must not re-insert a cost during the DFS!"); - - // Now check if this node had a corresponding PHI node using it. If so, - // we need to decrement the outstanding user count for it. - auto UserPNsIt = UserToPNMap.find(I); - if (UserPNsIt == UserToPNMap.end()) - return; - auto &UserPNs = UserPNsIt->second; - auto UserPNsSplitIt = std::stable_partition( - UserPNs.begin(), UserPNs.end(), [&](PHINode *UserPN) { - int &PNUserCount = PNUserCountMap.find(UserPN)->second; - assert( - PNUserCount > 0 && - "Should never re-visit a PN after its user count hits zero!"); - --PNUserCount; - return PNUserCount != 0; - }); - - // FIXME: Rather than one at a time, we should sum the savings as the - // cost will be completely shared. - SmallVector SpecWorklist; - for (auto *PN : llvm::make_range(UserPNsSplitIt, UserPNs.end())) { - InstructionCost SpecCost = TTI.TCC_Free; - for (Use &U : PN->uses()) - SpecCost += - SpecCostMap.find(cast(U.getUser()))->second; - SpecCost *= (NumPreds - 1); - // When the user count of a PHI node hits zero, we should check its - // profitability. If profitable, we should mark it for speculation - // and zero out the cost of everything it depends on. - InstructionCost CostSavings = CostSavingsMap.find(PN)->second; - if (SpecCost > CostSavings) { - LLVM_DEBUG(dbgs() << " Not profitable, speculation cost: " << *PN - << "\n" - " Cost savings: " - << CostSavings - << "\n" - " Speculation cost: " - << SpecCost << "\n"); - continue; - } - - // We're going to speculate this user-associated PHI. Copy it out and - // add its users to the worklist to update their cost. - SpecPNs.push_back(PN); - for (Use &U : PN->uses()) { - auto *UI = cast(U.getUser()); - auto CostMapIt = SpecCostMap.find(UI); - if (CostMapIt->second == 0) - continue; - // Zero out this cost entry to avoid duplicates. - CostMapIt->second = 0; - SpecWorklist.push_back(UI); - } - } - - // Now walk all the operands of the users in the worklist transitively - // to zero out all the memoized costs. - while (!SpecWorklist.empty()) { - Instruction *SpecI = SpecWorklist.pop_back_val(); - assert(SpecCostMap.find(SpecI)->second == 0 && - "Didn't zero out a cost!"); - - // Walk the operands recursively to zero out their cost as well. - for (auto *OpV : SpecI->operand_values()) { - auto *OpI = dyn_cast(OpV); - if (!OpI) - continue; - auto CostMapIt = SpecCostMap.find(OpI); - if (CostMapIt == SpecCostMap.end() || CostMapIt->second == 0) - continue; - CostMapIt->second = 0; - SpecWorklist.push_back(OpI); - } - } - }); - - return SpecPNs; -} - -/// Speculate users around a set of PHI nodes. -/// -/// This routine does the actual speculation around a set of PHI nodes where we -/// have determined this to be both safe and profitable. -/// -/// This routine handles any spliting of critical edges necessary to create -/// a safe block to speculate into as well as cloning the instructions and -/// rewriting all uses. -static void speculatePHIs(ArrayRef SpecPNs, - SmallPtrSetImpl &PotentialSpecSet, - SmallSetVector &PredSet, - DominatorTree &DT) { - LLVM_DEBUG(dbgs() << " Speculating around " << SpecPNs.size() << " PHIs!\n"); - NumPHIsSpeculated += SpecPNs.size(); - - // Split any critical edges so that we have a block to hoist into. - auto *ParentBB = SpecPNs[0]->getParent(); - SmallVector SpecPreds; - SpecPreds.reserve(PredSet.size()); - for (auto *PredBB : PredSet) { - auto *NewPredBB = SplitCriticalEdge( - PredBB, ParentBB, - CriticalEdgeSplittingOptions(&DT).setMergeIdenticalEdges()); - if (NewPredBB) { - ++NumEdgesSplit; - LLVM_DEBUG(dbgs() << " Split critical edge from: " << PredBB->getName() - << "\n"); - SpecPreds.push_back(NewPredBB); - } else { - assert(PredBB->getSingleSuccessor() == ParentBB && - "We need a non-critical predecessor to speculate into."); - assert(!isa(PredBB->getTerminator()) && - "Cannot have a non-critical invoke!"); - - // Already non-critical, use existing pred. - SpecPreds.push_back(PredBB); - } - } - - SmallPtrSet SpecSet; - SmallVector SpecList; - visitPHIUsersAndDepsInPostOrder(SpecPNs, - /*IsVisited*/ - [&](Instruction *I) { - // This is visited if we don't need to - // speculate it or we already have - // speculated it. - return !PotentialSpecSet.count(I) || - SpecSet.count(I); - }, - /*Visit*/ - [&](Instruction *I) { - // All operands scheduled, schedule this - // node. - SpecSet.insert(I); - SpecList.push_back(I); - }); - - int NumSpecInsts = SpecList.size() * SpecPreds.size(); - int NumRedundantInsts = NumSpecInsts - SpecList.size(); - LLVM_DEBUG(dbgs() << " Inserting " << NumSpecInsts - << " speculated instructions, " << NumRedundantInsts - << " redundancies\n"); - NumSpeculatedInstructions += NumSpecInsts; - NumNewRedundantInstructions += NumRedundantInsts; - - // Each predecessor is numbered by its index in `SpecPreds`, so for each - // instruction we speculate, the speculated instruction is stored in that - // index of the vector associated with the original instruction. We also - // store the incoming values for each predecessor from any PHIs used. - SmallDenseMap, 16> SpeculatedValueMap; - - // Inject the synthetic mappings to rewrite PHIs to the appropriate incoming - // value. This handles both the PHIs we are speculating around and any other - // PHIs that happen to be used. - for (auto *OrigI : SpecList) - for (auto *OpV : OrigI->operand_values()) { - auto *OpPN = dyn_cast(OpV); - if (!OpPN || OpPN->getParent() != ParentBB) - continue; - - auto InsertResult = SpeculatedValueMap.insert({OpPN, {}}); - if (!InsertResult.second) - continue; - - auto &SpeculatedVals = InsertResult.first->second; - - // Populating our structure for mapping is particularly annoying because - // finding an incoming value for a particular predecessor block in a PHI - // node is a linear time operation! To avoid quadratic behavior, we build - // a map for this PHI node's incoming values and then translate it into - // the more compact representation used below. - SmallDenseMap IncomingValueMap; - for (int i : llvm::seq(0, OpPN->getNumIncomingValues())) - IncomingValueMap[OpPN->getIncomingBlock(i)] = OpPN->getIncomingValue(i); - - for (auto *PredBB : SpecPreds) - SpeculatedVals.push_back(IncomingValueMap.find(PredBB)->second); - } - - // Speculate into each predecessor. - for (int PredIdx : llvm::seq(0, SpecPreds.size())) { - auto *PredBB = SpecPreds[PredIdx]; - assert(PredBB->getSingleSuccessor() == ParentBB && - "We need a non-critical predecessor to speculate into."); - - for (auto *OrigI : SpecList) { - auto *NewI = OrigI->clone(); - NewI->setName(Twine(OrigI->getName()) + "." + Twine(PredIdx)); - NewI->insertBefore(PredBB->getTerminator()); - - // Rewrite all the operands to the previously speculated instructions. - // Because we're walking in-order, the defs must precede the uses and we - // should already have these mappings. - for (Use &U : NewI->operands()) { - auto *OpI = dyn_cast(U.get()); - if (!OpI) - continue; - auto MapIt = SpeculatedValueMap.find(OpI); - if (MapIt == SpeculatedValueMap.end()) - continue; - const auto &SpeculatedVals = MapIt->second; - assert(SpeculatedVals[PredIdx] && - "Must have a speculated value for this predecessor!"); - assert(SpeculatedVals[PredIdx]->getType() == OpI->getType() && - "Speculated value has the wrong type!"); - - // Rewrite the use to this predecessor's speculated instruction. - U.set(SpeculatedVals[PredIdx]); - } - - // Commute instructions which now have a constant in the LHS but not the - // RHS. - if (NewI->isBinaryOp() && NewI->isCommutative() && - isa(NewI->getOperand(0)) && - !isa(NewI->getOperand(1))) - NewI->getOperandUse(0).swap(NewI->getOperandUse(1)); - - SpeculatedValueMap[OrigI].push_back(NewI); - assert(SpeculatedValueMap[OrigI][PredIdx] == NewI && - "Mismatched speculated instruction index!"); - } - } - - // Walk the speculated instruction list and if they have uses, insert a PHI - // for them from the speculated versions, and replace the uses with the PHI. - // Then erase the instructions as they have been fully speculated. The walk - // needs to be in reverse so that we don't think there are users when we'll - // actually eventually remove them later. - IRBuilder<> IRB(SpecPNs[0]); - for (auto *OrigI : llvm::reverse(SpecList)) { - // Check if we need a PHI for any remaining users and if so, insert it. - if (!OrigI->use_empty()) { - auto *SpecIPN = IRB.CreatePHI(OrigI->getType(), SpecPreds.size(), - Twine(OrigI->getName()) + ".phi"); - // Add the incoming values we speculated. - auto &SpeculatedVals = SpeculatedValueMap.find(OrigI)->second; - for (int PredIdx : llvm::seq(0, SpecPreds.size())) - SpecIPN->addIncoming(SpeculatedVals[PredIdx], SpecPreds[PredIdx]); - - // And replace the uses with the PHI node. - OrigI->replaceAllUsesWith(SpecIPN); - } - - // It is important to immediately erase this so that it stops using other - // instructions. This avoids inserting needless PHIs of them. - OrigI->eraseFromParent(); - } - - // All of the uses of the speculated phi nodes should be removed at this - // point, so erase them. - for (auto *SpecPN : SpecPNs) { - assert(SpecPN->use_empty() && "All users should have been speculated!"); - SpecPN->eraseFromParent(); - } -} - -/// Try to speculate around a series of PHIs from a single basic block. -/// -/// This routine checks whether any of these PHIs are profitable to speculate -/// users around. If safe and profitable, it does the speculation. It returns -/// true when at least some speculation occurs. -static bool tryToSpeculatePHIs(SmallVectorImpl &PNs, - DominatorTree &DT, TargetTransformInfo &TTI) { - LLVM_DEBUG(dbgs() << "Evaluating phi nodes for speculation:\n"); - - // Savings in cost from speculating around a PHI node. - SmallDenseMap CostSavingsMap; - - // Remember the set of instructions that are candidates for speculation so - // that we can quickly walk things within that space. This prunes out - // instructions already available along edges, etc. - SmallPtrSet PotentialSpecSet; - - // Remember the set of instructions that are (transitively) unsafe to - // speculate into the incoming edges of this basic block. This avoids - // recomputing them for each PHI node we check. This set is specific to this - // block though as things are pruned out of it based on what is available - // along incoming edges. - SmallPtrSet UnsafeSet; - - // For each PHI node in this block, check whether there are immediate folding - // opportunities from speculation, and whether that speculation will be - // valid. This determise the set of safe PHIs to speculate. - llvm::erase_if(PNs, [&](PHINode *PN) { - return !isSafeAndProfitableToSpeculateAroundPHI( - *PN, CostSavingsMap, PotentialSpecSet, UnsafeSet, DT, TTI); - }); - // If no PHIs were profitable, skip. - if (PNs.empty()) { - LLVM_DEBUG(dbgs() << " No safe and profitable PHIs found!\n"); - return false; - } - - // We need to know how much speculation will cost which is determined by how - // many incoming edges will need a copy of each speculated instruction. - SmallSetVector PredSet; - for (auto *PredBB : PNs[0]->blocks()) { - if (!PredSet.insert(PredBB)) - continue; - - // We cannot speculate when a predecessor is an indirect branch. - // FIXME: We also can't reliably create a non-critical edge block for - // speculation if the predecessor is an invoke. This doesn't seem - // fundamental and we should probably be splitting critical edges - // differently. - const auto *TermInst = PredBB->getTerminator(); - if (isa(TermInst) || - isa(TermInst) || - isa(TermInst)) { - LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: " - << PredBB->getName() << "\n"); - return false; - } - } - if (PredSet.size() < 2) { - LLVM_DEBUG(dbgs() << " Unimportant: phi with only one predecessor\n"); - return false; - } - - SmallVector SpecPNs = findProfitablePHIs( - PNs, CostSavingsMap, PotentialSpecSet, PredSet.size(), DT, TTI); - if (SpecPNs.empty()) - // Nothing to do. - return false; - - speculatePHIs(SpecPNs, PotentialSpecSet, PredSet, DT); - return true; -} - -PreservedAnalyses SpeculateAroundPHIsPass::run(Function &F, - FunctionAnalysisManager &AM) { - auto &DT = AM.getResult(F); - auto &TTI = AM.getResult(F); - - bool Changed = false; - for (auto *BB : ReversePostOrderTraversal(&F)) { - SmallVector PNs; - auto BBI = BB->begin(); - while (auto *PN = dyn_cast(&*BBI)) { - PNs.push_back(PN); - ++BBI; - } - - if (PNs.empty()) - continue; - - Changed |= tryToSpeculatePHIs(PNs, DT, TTI); - } - - if (!Changed) - return PreservedAnalyses::all(); - - PreservedAnalyses PA; - return PA; -} diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -237,7 +237,6 @@ ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-EP-OPTIMIZER-LAST: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-defaults.ll @@ -218,7 +218,6 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-POSTLINK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-POSTLINK-O-NEXT: Running pass: CGProfilePass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -187,7 +187,6 @@ ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -199,7 +199,6 @@ ; CHECK-O-NEXT: Running pass: InstSimplifyPass ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass ; CHECK-O-NEXT: Running pass: CGProfilePass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll @@ -97,12 +97,9 @@ ; CHECK-GENERIC-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef ; CHECK-GENERIC-NEXT: [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0 -; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[ENTRY_FOR_BODY6_CRIT_EDGE:%.*]] -; CHECK-GENERIC: entry.for.body6_crit_edge: -; CHECK-GENERIC-NEXT: [[INC_1:%.*]] = add nuw i32 0, 1 -; CHECK-GENERIC-NEXT: br label [[FOR_BODY6:%.*]] +; CHECK-GENERIC-NEXT: br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]] ; CHECK-GENERIC: for.body6: -; CHECK-GENERIC-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY6_FOR_BODY6_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[ENTRY_FOR_BODY6_CRIT_EDGE]] ] +; CHECK-GENERIC-NEXT: [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-GENERIC-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 ; CHECK-GENERIC-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 ; CHECK-GENERIC-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2 @@ -111,11 +108,9 @@ ; CHECK-GENERIC-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 ; CHECK-GENERIC-NEXT: [[ADD21:%.*]] = add nsw i32 [[MUL16]], [[TMP2]] ; CHECK-GENERIC-NEXT: store i32 [[ADD21]], i32* [[ARRAYIDX20]], align 4 -; CHECK-GENERIC-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC_PHI]], [[ARG_0]] -; CHECK-GENERIC-NEXT: br i1 [[CMP5]], label [[FOR_BODY6_FOR_BODY6_CRIT_EDGE]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK-GENERIC: for.body6.for.body6_crit_edge: -; CHECK-GENERIC-NEXT: [[INC_0]] = add nuw i32 [[INC_PHI]], 1 -; CHECK-GENERIC-NEXT: br label [[FOR_BODY6]] +; CHECK-GENERIC-NEXT: [[INC]] = add nuw i32 [[K_03]], 1 +; CHECK-GENERIC-NEXT: [[CMP5:%.*]] = icmp ult i32 [[INC]], [[ARG_0]] +; CHECK-GENERIC-NEXT: br i1 [[CMP5]], label [[FOR_BODY6]], label [[FOR_END]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-GENERIC: for.end: ; CHECK-GENERIC-NEXT: ret void ; diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -156,36 +156,27 @@ ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[DOT0:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 0 -; CHECK-NEXT: [[DOT017:%.*]] = getelementptr inbounds float, float* [[A]], i64 0 -; CHECK-NEXT: [[DOT018:%.*]] = getelementptr inbounds float, float* [[B]], i64 0 -; CHECK-NEXT: [[INDEX_NEXT_0:%.*]] = add nuw i64 0, 4 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX_NEXT_PHI:%.*]] = phi i64 [ [[INDEX_NEXT_0]], [[VECTOR_PH]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE:%.*]] ] -; CHECK-NEXT: [[DOTPHI:%.*]] = phi float* [ [[DOT018]], [[VECTOR_PH]] ], [ [[DOT120:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ] -; CHECK-NEXT: [[DOTPHI21:%.*]] = phi float* [ [[DOT017]], [[VECTOR_PH]] ], [ [[DOT119:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ] -; CHECK-NEXT: [[DOTPHI22:%.*]] = phi i32* [ [[DOT0]], [[VECTOR_PH]] ], [ [[DOT1:%.*]], [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]] ] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTPHI22]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4, !alias.scope !8 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[DOTPHI21]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4, !alias.scope !11 -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[DOTPHI]] to <4 x float>* -; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !13, !noalias !15 -; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP5]], [[WIDE_LOAD15]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP5]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast float* [[DOTPHI]] to <4 x float>* -; CHECK-NEXT: store <4 x float> [[PREDPHI]], <4 x float>* [[TMP8]], align 4, !alias.scope !13, !noalias !15 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT_PHI]], 10000 -; CHECK-NEXT: br i1 [[TMP9]], label [[EXIT:%.*]], label [[VECTOR_BODY_VECTOR_BODY_CRIT_EDGE]], !llvm.loop [[LOOP16:![0-9]+]] -; CHECK: vector.body.vector.body_crit_edge: -; CHECK-NEXT: [[DOT1]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX_NEXT_PHI]] -; CHECK-NEXT: [[DOT119]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX_NEXT_PHI]] -; CHECK-NEXT: [[DOT120]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX_NEXT_PHI]] -; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i64 [[INDEX_NEXT_PHI]], 4 -; CHECK-NEXT: br label [[VECTOR_BODY]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !8 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !11 +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[WIDE_LOAD14]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* +; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !13, !noalias !15 +; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD15]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP7]], <4 x float> [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP8]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[PREDPHI]], <4 x float>* [[TMP11]], align 4, !alias.scope !13, !noalias !15 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP12]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: loop.body: ; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[C_GEP:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[IV1]] diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -104,21 +104,18 @@ ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.body.preheader: ; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATED_LATER_NEWPM: for.cond.cleanup: ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: -; ROTATED_LATER_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] +; ROTATED_LATER_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() -; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] -; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] -; ROTATED_LATER_NEWPM: for.body.for.body_crit_edge: -; ROTATED_LATER_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1 -; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY]] +; ROTATED_LATER_NEWPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 +; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_NEWPM: return: ; ROTATED_LATER_NEWPM-NEXT: ret void ; @@ -155,21 +152,18 @@ ; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; ROTATE_NEWPM: for.body.preheader: ; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATE_NEWPM-NEXT: [[INC_1:%.*]] = add nuw nsw i32 0, 1 ; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] ; ROTATE_NEWPM: for.cond.cleanup: ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f2() ; ROTATE_NEWPM-NEXT: br label [[RETURN]] ; ROTATE_NEWPM: for.body: -; ROTATE_NEWPM-NEXT: [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ] +; ROTATE_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f1() -; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]] -; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] -; ROTATE_NEWPM: for.body.for.body_crit_edge: -; ROTATE_NEWPM-NEXT: [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1 -; ROTATE_NEWPM-NEXT: br label [[FOR_BODY]] +; ROTATE_NEWPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 +; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_NEWPM: return: ; ROTATE_NEWPM-NEXT: ret void ; diff --git a/llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll b/llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll deleted file mode 100644 --- a/llvm/test/Transforms/SpeculateAroundPHIs/basic-x86.ll +++ /dev/null @@ -1,639 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; Test the basic functionality of speculating around PHI nodes based on reduced -; cost of the constant operands to the PHI nodes using the x86 cost model. -; -; REQUIRES: x86-registered-target -; RUN: opt -S -passes=spec-phis < %s | FileCheck %s - -target triple = "x86_64-unknown-unknown" - -define i32 @test_basic(i1 %flag, i32 %arg) { -; CHECK-LABEL: @test_basic( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG]], 11 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = add i32 %arg, %p - ret i32 %sum -} - -; Check that we handle commuted operands and get the constant onto the RHS. -define i32 @test_commuted(i1 %flag, i32 %arg) { -; CHECK-LABEL: @test_commuted( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG]], 11 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = add i32 %p, %arg - ret i32 %sum -} - -define i32 @test_split_crit_edge(i1 %flag, i32 %arg) { -; CHECK-LABEL: @test_split_crit_edge( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[ENTRY_EXIT_CRIT_EDGE:%.*]], label [[A:%.*]] -; CHECK: entry.exit_crit_edge: -; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG:%.*]], 7 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG]], 11 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[ENTRY_EXIT_CRIT_EDGE]] ], [ [[SUM_1]], [[A]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - br i1 %flag, label %exit, label %a - -a: - br label %exit - -exit: - %p = phi i32 [ 7, %entry ], [ 11, %a ] - %sum = add i32 %arg, %p - ret i32 %sum -} - -define i32 @test_no_spec_dominating_inst(i1 %flag, i32* %ptr) { -; CHECK-LABEL: @test_no_spec_dominating_inst( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[PTR:%.*]] -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[LOAD]], 7 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[LOAD]], 11 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - %load = load i32, i32* %ptr - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = add i32 %load, %p - ret i32 %sum -} - -; We have special logic handling PHI nodes, make sure it doesn't get confused -; by a dominating PHI. -define i32 @test_no_spec_dominating_phi(i1 %flag1, i1 %flag2, i32 %x, i32 %y) { -; CHECK-LABEL: @test_no_spec_dominating_phi( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG1:%.*]], label [[X_BLOCK:%.*]], label [[Y_BLOCK:%.*]] -; CHECK: x.block: -; CHECK-NEXT: br label [[MERGE:%.*]] -; CHECK: y.block: -; CHECK-NEXT: br label [[MERGE]] -; CHECK: merge: -; CHECK-NEXT: [[XY_PHI:%.*]] = phi i32 [ [[X:%.*]], [[X_BLOCK]] ], [ [[Y:%.*]], [[Y_BLOCK]] ] -; CHECK-NEXT: br i1 [[FLAG2:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[XY_PHI]], 7 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[XY_PHI]], 11 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - br i1 %flag1, label %x.block, label %y.block - -x.block: - br label %merge - -y.block: - br label %merge - -merge: - %xy.phi = phi i32 [ %x, %x.block ], [ %y, %y.block ] - br i1 %flag2, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = add i32 %xy.phi, %p - ret i32 %sum -} - -; Ensure that we will speculate some number of "free" instructions on the given -; architecture even though they are unrelated to the PHI itself. -define i32 @test_speculate_free_insts(i1 %flag, i64 %arg) { -; CHECK-LABEL: @test_speculate_free_insts( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[T1_0:%.*]] = trunc i64 [[ARG:%.*]] to i48 -; CHECK-NEXT: [[T2_0:%.*]] = trunc i48 [[T1_0]] to i32 -; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[T2_0]], 7 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[T1_1:%.*]] = trunc i64 [[ARG]] to i48 -; CHECK-NEXT: [[T2_1:%.*]] = trunc i48 [[T1_1]] to i32 -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[T2_1]], 11 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %t1 = trunc i64 %arg to i48 - %t2 = trunc i48 %t1 to i32 - %sum = add i32 %t2, %p - ret i32 %sum -} - -define i32 @test_speculate_free_phis(i1 %flag, i32 %arg1, i32 %arg2) { -; CHECK-LABEL: @test_speculate_free_phis( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM_0:%.*]] = add i32 [[ARG1:%.*]], 7 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM_1:%.*]] = add i32 [[ARG2:%.*]], 11 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] -; CHECK-NEXT: [[P2:%.*]] = phi i32 [ [[ARG1]], [[A]] ], [ [[ARG2]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -; We don't DCE the now unused PHI node... -exit: - %p1 = phi i32 [ 7, %a ], [ 11, %b ] - %p2 = phi i32 [ %arg1, %a ], [ %arg2, %b ] - %sum = add i32 %p2, %p1 - ret i32 %sum -} - -; We shouldn't speculate multiple uses even if each individually looks -; profitable because of the total cost. -define i32 @test_no_spec_multi_uses(i1 %flag, i32 %arg1, i32 %arg2, i32 %arg3) { -; CHECK-LABEL: @test_no_spec_multi_uses( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] -; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ARG1:%.*]], [[P]] -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ARG2:%.*]], [[P]] -; CHECK-NEXT: [[ADD3:%.*]] = add i32 [[ARG3:%.*]], [[P]] -; CHECK-NEXT: [[SUM1:%.*]] = add i32 [[ADD1]], [[ADD2]] -; CHECK-NEXT: [[SUM2:%.*]] = add i32 [[SUM1]], [[ADD3]] -; CHECK-NEXT: ret i32 [[SUM2]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %add1 = add i32 %arg1, %p - %add2 = add i32 %arg2, %p - %add3 = add i32 %arg3, %p - %sum1 = add i32 %add1, %add2 - %sum2 = add i32 %sum1, %add3 - ret i32 %sum2 -} - -define i32 @test_multi_phis1(i1 %flag, i32 %arg) { -; CHECK-LABEL: @test_multi_phis1( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 1 -; CHECK-NEXT: [[SUM2_0:%.*]] = add i32 [[SUM1_0]], 3 -; CHECK-NEXT: [[SUM3_0:%.*]] = add i32 [[SUM2_0]], 5 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM1_1:%.*]] = add i32 [[ARG]], 2 -; CHECK-NEXT: [[SUM2_1:%.*]] = add i32 [[SUM1_1]], 4 -; CHECK-NEXT: [[SUM3_1:%.*]] = add i32 [[SUM2_1]], 6 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM3_PHI:%.*]] = phi i32 [ [[SUM3_0]], [[A]] ], [ [[SUM3_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM3_PHI]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p1 = phi i32 [ 1, %a ], [ 2, %b ] - %p2 = phi i32 [ 3, %a ], [ 4, %b ] - %p3 = phi i32 [ 5, %a ], [ 6, %b ] - %sum1 = add i32 %arg, %p1 - %sum2 = add i32 %sum1, %p2 - %sum3 = add i32 %sum2, %p3 - ret i32 %sum3 -} - -; Check that the order of the PHIs doesn't impact the behavior. -define i32 @test_multi_phis2(i1 %flag, i32 %arg) { -; CHECK-LABEL: @test_multi_phis2( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 1 -; CHECK-NEXT: [[SUM2_0:%.*]] = add i32 [[SUM1_0]], 3 -; CHECK-NEXT: [[SUM3_0:%.*]] = add i32 [[SUM2_0]], 5 -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM1_1:%.*]] = add i32 [[ARG]], 2 -; CHECK-NEXT: [[SUM2_1:%.*]] = add i32 [[SUM1_1]], 4 -; CHECK-NEXT: [[SUM3_1:%.*]] = add i32 [[SUM2_1]], 6 -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM3_PHI:%.*]] = phi i32 [ [[SUM3_0]], [[A]] ], [ [[SUM3_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM3_PHI]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p3 = phi i32 [ 5, %a ], [ 6, %b ] - %p2 = phi i32 [ 3, %a ], [ 4, %b ] - %p1 = phi i32 [ 1, %a ], [ 2, %b ] - %sum1 = add i32 %arg, %p1 - %sum2 = add i32 %sum1, %p2 - %sum3 = add i32 %sum2, %p3 - ret i32 %sum3 -} - -define i32 @test_no_spec_indirectbr(i1 %flag, i32 %arg) { -; CHECK-LABEL: @test_no_spec_indirectbr( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: indirectbr i8* undef, [label %exit] -; CHECK: b: -; CHECK-NEXT: indirectbr i8* undef, [label %exit] -; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] -; CHECK-NEXT: ret i32 [[SUM]] -; -entry: - br i1 %flag, label %a, label %b - -a: - indirectbr i8* undef, [label %exit] - -b: - indirectbr i8* undef, [label %exit] - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = add i32 %arg, %p - ret i32 %sum -} - -declare void @g() - -declare i32 @__gxx_personality_v0(...) - -; FIXME: We should be able to handle this case -- only the exceptional edge is -; impossible to split. -define i32 @test_no_spec_invoke_continue(i1 %flag, i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @test_no_spec_invoke_continue( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label [[EXIT:%.*]] unwind label [[LPAD:%.*]] -; CHECK: b: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label [[EXIT]] unwind label [[LPAD]] -; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] -; CHECK-NEXT: ret i32 [[SUM]] -; CHECK: lpad: -; CHECK-NEXT: [[LP:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: cleanup -; CHECK-NEXT: resume { i8*, i32 } undef -; -entry: - br i1 %flag, label %a, label %b - -a: - invoke void @g() - to label %exit unwind label %lpad - -b: - invoke void @g() - to label %exit unwind label %lpad - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = add i32 %arg, %p - ret i32 %sum - -lpad: - %lp = landingpad { i8*, i32 } - cleanup - resume { i8*, i32 } undef -} - -define i32 @test_no_spec_landingpad(i32 %arg, i32* %ptr) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -; CHECK-LABEL: @test_no_spec_landingpad( -; CHECK-NEXT: entry: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] -; CHECK: invoke.cont: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label [[EXIT:%.*]] unwind label [[LPAD]] -; CHECK: lpad: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[ENTRY:%.*]] ], [ 11, [[INVOKE_CONT]] ] -; CHECK-NEXT: [[LP:%.*]] = landingpad { i8*, i32 } -; CHECK-NEXT: cleanup -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] -; CHECK-NEXT: store i32 [[SUM]], i32* [[PTR:%.*]] -; CHECK-NEXT: resume { i8*, i32 } undef -; CHECK: exit: -; CHECK-NEXT: ret i32 0 -; -entry: - invoke void @g() - to label %invoke.cont unwind label %lpad - -invoke.cont: - invoke void @g() - to label %exit unwind label %lpad - -lpad: - %p = phi i32 [ 7, %entry ], [ 11, %invoke.cont ] - %lp = landingpad { i8*, i32 } - cleanup - %sum = add i32 %arg, %p - store i32 %sum, i32* %ptr - resume { i8*, i32 } undef - -exit: - ret i32 0 -} - -declare i32 @__CxxFrameHandler3(...) - -define i32 @test_no_spec_cleanuppad(i32 %arg, i32* %ptr) personality i32 (...)* @__CxxFrameHandler3 { -; CHECK-LABEL: @test_no_spec_cleanuppad( -; CHECK-NEXT: entry: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]] -; CHECK: invoke.cont: -; CHECK-NEXT: invoke void @g() -; CHECK-NEXT: to label [[EXIT:%.*]] unwind label [[LPAD]] -; CHECK: lpad: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[ENTRY:%.*]] ], [ 11, [[INVOKE_CONT]] ] -; CHECK-NEXT: [[CP:%.*]] = cleanuppad within none [] -; CHECK-NEXT: [[SUM:%.*]] = add i32 [[ARG:%.*]], [[P]] -; CHECK-NEXT: store i32 [[SUM]], i32* [[PTR:%.*]] -; CHECK-NEXT: cleanupret from [[CP]] unwind to caller -; CHECK: exit: -; CHECK-NEXT: ret i32 0 -; -entry: - invoke void @g() - to label %invoke.cont unwind label %lpad - -invoke.cont: - invoke void @g() - to label %exit unwind label %lpad - -lpad: - %p = phi i32 [ 7, %entry ], [ 11, %invoke.cont ] - %cp = cleanuppad within none [] - %sum = add i32 %arg, %p - store i32 %sum, i32* %ptr - cleanupret from %cp unwind to caller - -exit: - ret i32 0 -} - -; Check that we don't fall over when confronted with seemingly reasonable code -; for us to handle but in an unreachable region and with non-PHI use-def -; cycles. -define i32 @test_unreachable_non_phi_cycles(i1 %flag, i32 %arg) { -; CHECK-LABEL: @test_unreachable_non_phi_cycles( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret i32 42 -; CHECK: a: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A:%.*]] ], [ 11, [[B:%.*]] ] -; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[SUM:%.*]] to i64 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[ZEXT]] to i32 -; CHECK-NEXT: [[SUM]] = add i32 [[TRUNC]], [[P]] -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A]], label [[B]] -; -entry: - ret i32 42 - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %zext = zext i32 %sum to i64 - %trunc = trunc i64 %zext to i32 - %sum = add i32 %trunc, %p - br i1 %flag, label %a, label %b -} - -; Check that we don't speculate in the face of an expensive immediate. There -; are two reasons this should never speculate. First, even a local analysis -; should fail because it makes some paths (%a) potentially more expensive due -; to multiple uses of the immediate. Additionally, when we go to speculate the -; instructions, their cost will also be too high. -; FIXME: The goal is really to test the first property, but there doesn't -; happen to be any way to use free-to-speculate instructions here so that it -; would be the only interesting property. -define i64 @test_expensive_imm(i32 %flag, i64 %arg) { -; CHECK-LABEL: @test_expensive_imm( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 [[FLAG:%.*]], label [[A:%.*]] [ -; CHECK-NEXT: i32 1, label [[B:%.*]] -; CHECK-NEXT: i32 2, label [[C:%.*]] -; CHECK-NEXT: i32 3, label [[D:%.*]] -; CHECK-NEXT: ] -; CHECK: a: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: c: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: d: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i64 [ 4294967296, [[A]] ], [ 1, [[B]] ], [ 1, [[C]] ], [ 1, [[D]] ] -; CHECK-NEXT: [[SUM1:%.*]] = add i64 [[ARG:%.*]], [[P]] -; CHECK-NEXT: [[SUM2:%.*]] = add i64 [[SUM1]], [[P]] -; CHECK-NEXT: ret i64 [[SUM2]] -; -entry: - switch i32 %flag, label %a [ - i32 1, label %b - i32 2, label %c - i32 3, label %d - ] - -a: - br label %exit - -b: - br label %exit - -c: - br label %exit - -d: - br label %exit - -exit: - %p = phi i64 [ 4294967296, %a ], [ 1, %b ], [ 1, %c ], [ 1, %d ] - %sum1 = add i64 %arg, %p - %sum2 = add i64 %sum1, %p - ret i64 %sum2 -} - -define i32 @test_no_spec_non_postdominating_uses(i1 %flag1, i1 %flag2, i32 %arg) { -; CHECK-LABEL: @test_no_spec_non_postdominating_uses( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG1:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM1_0:%.*]] = add i32 [[ARG:%.*]], 7 -; CHECK-NEXT: br label [[MERGE:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM1_1:%.*]] = add i32 [[ARG]], 11 -; CHECK-NEXT: br label [[MERGE]] -; CHECK: merge: -; CHECK-NEXT: [[SUM1_PHI:%.*]] = phi i32 [ [[SUM1_0]], [[A]] ], [ [[SUM1_1]], [[B]] ] -; CHECK-NEXT: [[P2:%.*]] = phi i32 [ 13, [[A]] ], [ 42, [[B]] ] -; CHECK-NEXT: br i1 [[FLAG2:%.*]], label [[EXIT1:%.*]], label [[EXIT2:%.*]] -; CHECK: exit1: -; CHECK-NEXT: ret i32 [[SUM1_PHI]] -; CHECK: exit2: -; CHECK-NEXT: [[SUM2:%.*]] = add i32 [[ARG]], [[P2]] -; CHECK-NEXT: ret i32 [[SUM2]] -; -entry: - br i1 %flag1, label %a, label %b - -a: - br label %merge - -b: - br label %merge - -merge: - %p1 = phi i32 [ 7, %a ], [ 11, %b ] - %p2 = phi i32 [ 13, %a ], [ 42, %b ] - %sum1 = add i32 %arg, %p1 - br i1 %flag2, label %exit1, label %exit2 - -exit1: - ret i32 %sum1 - -exit2: - %sum2 = add i32 %arg, %p2 - ret i32 %sum2 -} diff --git a/llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll b/llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll deleted file mode 100644 --- a/llvm/test/Transforms/SpeculateAroundPHIs/convergent.ll +++ /dev/null @@ -1,98 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=spec-phis < %s | FileCheck %s -; Make sure convergent and noduplicate calls aren't duplicated. - -declare i32 @llvm.convergent(i32) #0 -declare i32 @llvm.noduplicate(i32) #1 -declare i32 @llvm.regular(i32) #2 - -define i32 @test_convergent(i1 %flag, i32 %arg) #0 { -; CHECK-LABEL: @test_convergent( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] -; CHECK-NEXT: [[SUM:%.*]] = call i32 @llvm.convergent(i32 [[P]]) -; CHECK-NEXT: ret i32 [[SUM]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = call i32 @llvm.convergent(i32 %p) - ret i32 %sum -} - -define i32 @test_noduplicate(i1 %flag, i32 %arg) #1 { -; CHECK-LABEL: @test_noduplicate( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 7, [[A]] ], [ 11, [[B]] ] -; CHECK-NEXT: [[SUM:%.*]] = call i32 @llvm.noduplicate(i32 [[P]]) -; CHECK-NEXT: ret i32 [[SUM]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = call i32 @llvm.noduplicate(i32 %p) - ret i32 %sum -} - -; Otherwise identical function which should be transformed. -define i32 @test_reference(i1 %flag, i32 %arg) #2 { -; CHECK-LABEL: @test_reference( -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[A:%.*]], label [[B:%.*]] -; CHECK: a: -; CHECK-NEXT: [[SUM_0:%.*]] = call i32 @llvm.regular(i32 7) -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: b: -; CHECK-NEXT: [[SUM_1:%.*]] = call i32 @llvm.regular(i32 11) -; CHECK-NEXT: br label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: [[SUM_PHI:%.*]] = phi i32 [ [[SUM_0]], [[A]] ], [ [[SUM_1]], [[B]] ] -; CHECK-NEXT: ret i32 [[SUM_PHI]] -; -entry: - br i1 %flag, label %a, label %b - -a: - br label %exit - -b: - br label %exit - -exit: - %p = phi i32 [ 7, %a ], [ 11, %b ] - %sum = call i32 @llvm.regular(i32 %p) - ret i32 %sum -} - - -attributes #0 = { nounwind readnone convergent speculatable } -attributes #1 = { nounwind readnone noduplicate speculatable } -attributes #2 = { nounwind readnone speculatable } diff --git a/llvm/test/Transforms/SpeculateAroundPHIs/pr42991.ll b/llvm/test/Transforms/SpeculateAroundPHIs/pr42991.ll deleted file mode 100644 --- a/llvm/test/Transforms/SpeculateAroundPHIs/pr42991.ll +++ /dev/null @@ -1,44 +0,0 @@ -; RUN: opt -S -passes=spec-phis %s - -; This testcase crashes during the speculate around PHIs pass. The pass however -; results in no changes. - -define i32 @test1() { -entry: - callbr void asm sideeffect "", "X,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test1, %return), i8* blockaddress(@test1, %f)) - to label %asm.fallthrough [label %return, label %f] - -asm.fallthrough: - br label %return - -f: - br label %return - -return: - %retval.0 = phi i32 [ 0, %f ], [ 1, %asm.fallthrough ], [ 1, %entry ] - ret i32 %retval.0 -} - -define void @test2() { -entry: - br label %tailrecurse - -tailrecurse: - %call = tail call i32 @test3() - %tobool1 = icmp eq i32 %call, 0 - callbr void asm sideeffect "", "X,X,~{dirflag},~{fpsr},~{flags}"(i8* blockaddress(@test2, %test1.exit), i8* blockaddress(@test2, %f.i)) - to label %if.end6 [label %test1.exit, label %f.i] - -f.i: - br label %test1.exit - -test1.exit: - %retval.0.i = phi i1 [ false, %f.i ], [ true, %tailrecurse ] - %brmerge = or i1 %tobool1, %retval.0.i - br i1 %brmerge, label %if.end6, label %tailrecurse - -if.end6: - ret void -} - -declare i32 @test3() diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Scalar/BUILD.gn @@ -83,7 +83,6 @@ "SimpleLoopUnswitch.cpp", "SimplifyCFGPass.cpp", "Sink.cpp", - "SpeculateAroundPHIs.cpp", "SpeculativeExecution.cpp", "StraightLineStrengthReduce.cpp", "StructurizeCFG.cpp",