diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -111,7 +111,7 @@ // // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. // -FunctionPass *createSROAPass(); +FunctionPass *createSROAPass(bool PreserveCFG = true); //===----------------------------------------------------------------------===// // diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -1,4 +1,4 @@ -//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===// +//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,17 +15,21 @@ #ifndef LLVM_TRANSFORMS_SCALAR_SROA_H #define LLVM_TRANSFORMS_SCALAR_SROA_H +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" +#include #include namespace llvm { class AllocaInst; +class LoadInst; class AssumptionCache; class DominatorTree; +class DomTreeUpdater; class Function; class LLVMContext; class PHINode; @@ -41,8 +45,31 @@ class Partition; class SROALegacyPass; +class SelectHandSpeculativity { + unsigned char Storage = 0; + using TrueVal = Bitfield::Element; // Low 0'th bit. + using FalseVal = Bitfield::Element; // Low 1'th bit. +public: + SelectHandSpeculativity() = default; + SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal); + bool isSpeculatable(bool isTrueVal) const; + bool areAllSpeculatable() const; + bool areAnySpeculatable() const; + bool areNoneSpeculatable() const; + // For interop as int half of PointerIntPair. + explicit operator intptr_t() const { return static_cast(Storage); } + explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {} +}; +static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char)); + +using PossiblySpeculatableLoad = + PointerIntPair; +using PossiblySpeculatableLoads = SmallVector; + } // end namespace sroa +enum class SROAOptions : bool { ModifyCFG, PreserveCFG }; + /// An optimization pass providing Scalar Replacement of Aggregates. /// /// This pass takes allocations which can be completely analyzed (that is, they @@ -63,8 +90,9 @@ /// SSA vector values. class SROAPass : public PassInfoMixin { LLVMContext *C = nullptr; - DominatorTree *DT = nullptr; + DomTreeUpdater *DTU = nullptr; AssumptionCache *AC = nullptr; + const bool PreserveCFG; /// Worklist of alloca instructions to simplify. /// @@ -98,27 +126,50 @@ /// All of these PHIs have been checked for the safety of speculation and by /// being speculated will allow promoting allocas currently in the promotable /// queue. - SetVector> SpeculatablePHIs; + SetVector> SpeculatablePHIs; - /// A worklist of select instructions to speculate prior to promoting + /// A worklist of select instructions to rewrite prior to promoting /// allocas. + SmallMapVector + SelectsToRewrite; + + /// Select instructions that use an alloca and are subsequently loaded can be + /// rewritten to load both input pointers and then select between the result, + /// allowing the load of the alloca to be promoted. + /// From this: + /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other + /// %V = load i32* %P2 + /// to: + /// %V1 = load i32* %Alloca -> will be mem2reg'd + /// %V2 = load i32* %Other + /// %V = select i1 %cond, i32 %V1, i32 %V2 /// - /// All of these select instructions have been checked for the safety of - /// speculation and by being speculated will allow promoting allocas - /// currently in the promotable queue. - SetVector> SpeculatableSelects; + /// We can do this to a select if its only uses are loads + /// and if either the operand to the select can be loaded unconditionally, + /// or if we are allowed to perform CFG modifications. + /// If found an intervening bitcast with a single use of the load, + /// allow the promotion. + static std::optional + isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG); public: - SROAPass() = default; + /// If \p PreserveCFG is set, then the pass is not allowed to modify CFG + /// in any way, even if it would update CFG analyses. + SROAPass(SROAOptions PreserveCFG); /// Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); + private: friend class sroa::AllocaSliceRewriter; friend class sroa::SROALegacyPass; /// Helper used by both the public run method and by the legacy pass. + PreservedAnalyses runImpl(Function &F, DomTreeUpdater &RunDTU, + AssumptionCache &RunAC); PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, AssumptionCache &RunAC); @@ -126,7 +177,7 @@ AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, sroa::Partition &P); bool splitAlloca(AllocaInst &AI, sroa::AllocaSlices &AS); - bool runOnAlloca(AllocaInst &AI); + std::pair runOnAlloca(AllocaInst &AI); void clobberUse(Use &U); bool deleteDeadInstructions(SmallPtrSetImpl &DeletedAllocas); bool promoteAllocas(Function &F); diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -464,10 +464,13 @@ /// ElseBlock /// SplitBefore /// Tail +/// +/// Updates DT if given. void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, - MDNode *BranchWeights = nullptr); + MDNode *BranchWeights = nullptr, + DomTreeUpdater *DTU = nullptr); /// Check whether BB is the merge point of a if-region. /// If so, return the branch instruction that determines which entry into diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -836,6 +836,19 @@ return Result; } +Expected parseSROAOptions(StringRef Params) { + if (Params == "preserve-cfg") + return SROAOptions::PreserveCFG; + if (Params == "modify-cfg") + return SROAOptions::ModifyCFG; + return make_error( + formatv("invalid SROA pass parameter '{0}' (either preserve-cfg or " + "modify-cfg must be specified)", + Params) + .str(), + inconvertibleErrorCode()); +} + Expected parseStackLifetimeOptions(StringRef Params) { StackLifetime::LivenessType Result = StackLifetime::LivenessType::May; diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -328,7 +328,7 @@ // Form SSA out of local memory accesses after breaking apart aggregates into // scalars. - FPM.addPass(SROAPass()); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); @@ -427,7 +427,7 @@ /*UseBlockFrequencyInfo=*/false)); // Delete small array after loop unroll. - FPM.addPass(SROAPass()); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); // Specially optimize memory movement as it doesn't look like dataflow in SSA. FPM.addPass(MemCpyOptPass()); @@ -478,7 +478,7 @@ // Form SSA out of local memory accesses after breaking apart aggregates into // scalars. - FPM.addPass(SROAPass()); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); @@ -613,7 +613,7 @@ /*UseBlockFrequencyInfo=*/false)); // Delete small array after loop unroll. - FPM.addPass(SROAPass()); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); // Try vectorization/scalarization transforms that are both improvements // themselves and can allow further folds with GVN and InstCombine. @@ -714,7 +714,7 @@ CGSCCPassManager &CGPipeline = MIWP.getPM(); FunctionPassManager FPM; - FPM.addPass(SROAPass()); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( true))); // Merge & remove basic blocks. @@ -963,7 +963,7 @@ // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. EarlyFPM.addPass(LowerExpectIntrinsicPass()); EarlyFPM.addPass(SimplifyCFGPass()); - EarlyFPM.addPass(SROAPass()); + EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG)); EarlyFPM.addPass(EarlyCSEPass()); if (Level == OptimizationLevel::O3) EarlyFPM.addPass(CallSiteSplittingPass()); @@ -1113,7 +1113,10 @@ // Now that we are done with loop unrolling, be it either by LoopVectorizer, // or LoopUnroll passes, some variable-offset GEP's into alloca's could have // become constant-offset, thus enabling SROA and alloca promotion. Do so. - FPM.addPass(SROAPass()); + // NOTE: we are very late in the pipeline, and we don't have any LICM + // or SimplifyCFG passes scheduled after us, that would cleanup + // the CFG mess this may created if allowed to modify CFG, so forbid that. + FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); } if (!IsFullLTO) { @@ -1204,7 +1207,10 @@ // Now that we are done with loop unrolling, be it either by LoopVectorizer, // or LoopUnroll passes, some variable-offset GEP's into alloca's could have // become constant-offset, thus enabling SROA and alloca promotion. Do so. - FPM.addPass(SROAPass()); + // NOTE: we are very late in the pipeline, and we don't have any LICM + // or SimplifyCFG passes scheduled after us, that would cleanup + // the CFG mess this may created if allowed to modify CFG, so forbid that. + FPM.addPass(SROAPass(SROAOptions::PreserveCFG)); FPM.addPass(InstCombinePass()); FPM.addPass( RequireAnalysisPass()); @@ -1745,7 +1751,7 @@ } // Break up allocas - FPM.addPass(SROAPass()); + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -376,7 +376,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) -FUNCTION_PASS("sroa", SROAPass()) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) @@ -473,6 +472,13 @@ "no-load-pre;load-pre;" "no-split-backedge-load-pre;split-backedge-load-pre;" "no-memdep;memdep") +FUNCTION_PASS_WITH_PARAMS("sroa", + "SROAPass", + [](SROAOptions PreserveCFG) { + return SROAPass(PreserveCFG); + }, + parseSROAOptions, + "PreserveCFG;ModifyCFG") FUNCTION_PASS_WITH_PARAMS("print", "StackLifetimePrinterPass", [](StackLifetime::LivenessType Type) { diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -38,6 +38,7 @@ #include "llvm/ADT/iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" @@ -78,6 +79,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include @@ -104,6 +106,8 @@ STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); +STATISTIC(NumLoadsPredicated, + "Number of loads rewritten into predicated loads to allow promotion"); STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); @@ -111,7 +115,6 @@ /// GEPs. static cl::opt SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden); - namespace { /// A custom IRBuilder inserter which prefixes all names, but only in @@ -1306,24 +1309,53 @@ PN.eraseFromParent(); } -/// Select instructions that use an alloca and are subsequently loaded can be -/// rewritten to load both input pointers and then select between the result, -/// allowing the load of the alloca to be promoted. -/// From this: -/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other -/// %V = load i32* %P2 -/// to: -/// %V1 = load i32* %Alloca -> will be mem2reg'd -/// %V2 = load i32* %Other -/// %V = select i1 %cond, i32 %V1, i32 %V2 -/// -/// We can do this to a select if its only uses are loads and if the operand -/// to the select can be loaded unconditionally. If found an intervening bitcast -/// with a single use of the load, allow the promotion. -static bool isSafeSelectToSpeculate(SelectInst &SI) { - Value *TValue = SI.getTrueValue(); - Value *FValue = SI.getFalseValue(); +sroa::SelectHandSpeculativity & +sroa::SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) { + if (isTrueVal) + Bitfield::set(Storage, true); + else + Bitfield::set(Storage, true); + return *this; +} + +bool sroa::SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const { + return isTrueVal + ? Bitfield::get(Storage) + : Bitfield::get(Storage); +} + +bool sroa::SelectHandSpeculativity::areAllSpeculatable() const { + return isSpeculatable(/*isTrueVal=*/true) && + isSpeculatable(/*isTrueVal=*/false); +} + +bool sroa::SelectHandSpeculativity::areAnySpeculatable() const { + return isSpeculatable(/*isTrueVal=*/true) || + isSpeculatable(/*isTrueVal=*/false); +} +bool sroa::SelectHandSpeculativity::areNoneSpeculatable() const { + return !areAnySpeculatable(); +} + +static sroa::SelectHandSpeculativity +isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG) { + assert(LI.isSimple() && "Only for simple loads"); + sroa::SelectHandSpeculativity Spec; + const DataLayout &DL = SI.getModule()->getDataLayout(); + for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()}) + if (isSafeToLoadUnconditionally(Value, LI.getType(), LI.getAlign(), DL, + &LI)) + Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue()); + else if (PreserveCFG) + return Spec; + + return Spec; +} + +std::optional +SROAPass::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) { + PossiblySpeculatableLoads Loads; for (User *U : SI.users()) { LoadInst *LI; @@ -1333,75 +1365,136 @@ else LI = dyn_cast(U); - if (!LI || !LI->isSimple()) - return false; + // Note that atomic loads can be transformed; + // atomic semantics do not have any meaning for a local alloca. + if (!LI || LI->isVolatile()) + return {}; // Give up on this `select`. - // Both operands to the select need to be dereferenceable, either - // absolutely (e.g. allocas) or at this point because we can see other - // accesses to it. - if (!isSafeToLoadUnconditionally(TValue, LI->getType(), - LI->getAlign(), DL, LI)) - return false; - if (!isSafeToLoadUnconditionally(FValue, LI->getType(), - LI->getAlign(), DL, LI)) - return false; + PossiblySpeculatableLoad Load(LI); + + if (!LI->isSimple()) { + // If the `load` is not simple, we can't speculatively execute it, + // but we could handle this via a CFG modification. But can we? + if (PreserveCFG) + return {}; // Give up on this `select`. + Loads.emplace_back(Load); + continue; + } + + sroa::SelectHandSpeculativity Spec = + isSafeLoadOfSelectToSpeculate(*LI, SI, PreserveCFG); + if (PreserveCFG && !Spec.areAllSpeculatable()) + return {}; // Give up on this `select`. + + Load.setInt(Spec); + Loads.emplace_back(Load); } - return true; + return Loads; } -static void speculateSelectInstLoads(IRBuilderTy &IRB, SelectInst &SI) { - LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); +static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, + IRBuilderTy &IRB) { + LLVM_DEBUG(dbgs() << " original load: " << SI << "\n"); IRB.SetInsertPoint(&SI); Value *TV = SI.getTrueValue(); Value *FV = SI.getFalseValue(); - // Replace the loads of the select with a select of two loads. - while (!SI.use_empty()) { - LoadInst *LI; - BitCastInst *BC = dyn_cast(SI.user_back()); - if (BC) { - assert(BC->hasOneUse() && "Bitcast should have a single use."); - LI = cast(BC->user_back()); - } else { - LI = cast(SI.user_back()); - } + // Replace the given load of the select with a select of two loads. - assert(LI->isSimple() && "We only speculate simple loads"); + assert(LI.isSimple() && "We only speculate simple loads"); - IRB.SetInsertPoint(LI); - Value *NewTV = - BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast") - : TV; - Value *NewFV = - BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast") - : FV; - LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV, - LI->getName() + ".sroa.speculate.load.true"); - LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV, - LI->getName() + ".sroa.speculate.load.false"); - NumLoadsSpeculated += 2; + IRB.SetInsertPoint(&LI); + LoadInst *TL = + IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(), + LI.getName() + ".sroa.speculate.load.true"); + LoadInst *FL = + IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(), + LI.getName() + ".sroa.speculate.load.false"); + NumLoadsSpeculated += 2; - // Transfer alignment and AA info if present. - TL->setAlignment(LI->getAlign()); - FL->setAlignment(LI->getAlign()); + // Transfer alignment and AA info if present. + TL->setAlignment(LI.getAlign()); + FL->setAlignment(LI.getAlign()); - AAMDNodes Tags = LI->getAAMetadata(); - if (Tags) { - TL->setAAMetadata(Tags); - FL->setAAMetadata(Tags); - } - - Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, - LI->getName() + ".sroa.speculated"); - - LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n"); - LI->replaceAllUsesWith(V); - LI->eraseFromParent(); - if (BC) - BC->eraseFromParent(); + AAMDNodes Tags = LI.getAAMetadata(); + if (Tags) { + TL->setAAMetadata(Tags); + FL->setAAMetadata(Tags); } + + Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, + LI.getName() + ".sroa.speculated"); + + LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n"); + LI.replaceAllUsesWith(V); +} + +static void rewriteLoadOfSelect(SelectInst &SI, LoadInst &LI, + sroa::SelectHandSpeculativity Spec, + DomTreeUpdater &DTU) { + LLVM_DEBUG(dbgs() << " original load: " << SI << "\n"); + BasicBlock *Head = LI.getParent(); + Instruction *ThenTerm = nullptr; + Instruction *ElseTerm = nullptr; + if (Spec.areNoneSpeculatable()) + SplitBlockAndInsertIfThenElse(SI.getCondition(), &LI, &ThenTerm, &ElseTerm, + SI.getMetadata(LLVMContext::MD_prof), &DTU); + else { + SplitBlockAndInsertIfThen(SI.getCondition(), &LI, /*Unreachable=*/false, + SI.getMetadata(LLVMContext::MD_prof), &DTU, + /*LI=*/nullptr, /*ThenBlock=*/nullptr); + if (Spec.isSpeculatable(/*isTrueVal=*/true)) + cast(Head->getTerminator())->swapSuccessors(); + } + auto *HeadBI = cast(Head->getTerminator()); + Spec = {}; // Do not use `Spec` beyond this point. + BasicBlock *Tail = LI.getParent(); + Tail->setName(Head->getName() + ".cont"); + auto *PN = PHINode::Create(LI.getType(), 2, "", &LI); + for (BasicBlock *SuccBB : successors(Head)) { + bool IsThen = SuccBB == HeadBI->getSuccessor(0); + int SuccIdx = IsThen ? 0 : 1; + auto *NewLoadBB = SuccBB == Tail ? Head : SuccBB; + if (NewLoadBB != Head) { + NewLoadBB->setName(Head->getName() + (IsThen ? ".then" : ".else")); + ++NumLoadsPredicated; + } else + ++NumLoadsSpeculated; + auto *CondLoad = cast(LI.clone()); + CondLoad->insertBefore(NewLoadBB->getTerminator()); + CondLoad->setOperand(0, SI.getOperand(1 + SuccIdx)); + CondLoad->setName(LI.getName() + (IsThen ? ".then" : ".else") + ".val"); + PN->addIncoming(CondLoad, NewLoadBB); + } + PN->takeName(&LI); + LLVM_DEBUG(dbgs() << " to: " << *PN << "\n"); + LI.replaceAllUsesWith(PN); +} + +static bool rewriteSelectInstLoads(SelectInst &SI, + const sroa::PossiblySpeculatableLoads &Loads, + IRBuilderTy &IRB, DomTreeUpdater *DTU) { + bool CFGChanged = false; + LLVM_DEBUG(dbgs() << " original select: " << SI << "\n"); + + for (const PossiblySpeculatableLoad &Load : Loads) { + LoadInst *LI = Load.getPointer(); + sroa::SelectHandSpeculativity Spec = Load.getInt(); + if (Spec.areAllSpeculatable()) { + speculateSelectInstLoads(SI, *LI, IRB); + } else { + assert("Should not get here when not allowed to modify the CFG!"); + rewriteLoadOfSelect(SI, *LI, Spec, *DTU); + CFGChanged = true; + } + LI->eraseFromParent(); + } + + for (User *U : make_early_inc_range(SI.users())) + cast(U)->eraseFromParent(); SI.eraseFromParent(); + return CFGChanged; } /// Build a GEP out of a base pointer and indices. @@ -4397,13 +4490,21 @@ break; } - for (SelectInst *Sel : SelectUsers) - if (!isSafeSelectToSpeculate(*Sel)) { + SmallVector, 2> + NewSelectsToRewrite; + NewSelectsToRewrite.reserve(SelectUsers.size()); + for (SelectInst *Sel : SelectUsers) { + std::optional Loads = + isSafeSelectToSpeculate(*Sel, PreserveCFG); + if (!Loads) { Promotable = false; PHIUsers.clear(); SelectUsers.clear(); + NewSelectsToRewrite.clear(); break; } + NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Loads)); + } if (Promotable) { for (Use *U : AS.getDeadUsesIfPromotable()) { @@ -4422,8 +4523,12 @@ // next iteration. for (PHINode *PHIUser : PHIUsers) SpeculatablePHIs.insert(PHIUser); - for (SelectInst *SelectUser : SelectUsers) - SpeculatableSelects.insert(SelectUser); + SelectsToRewrite.reserve(SelectsToRewrite.size() + + NewSelectsToRewrite.size()); + for (auto &&KV : llvm::make_range( + std::make_move_iterator(NewSelectsToRewrite.begin()), + std::make_move_iterator(NewSelectsToRewrite.end()))) + SelectsToRewrite.insert(std::move(KV)); Worklist.insert(NewAI); } } else { @@ -4637,14 +4742,19 @@ /// This analyzes the alloca to ensure we can reason about it, builds /// the slices of the alloca, and then hands it off to be split and /// rewritten as needed. -bool SROAPass::runOnAlloca(AllocaInst &AI) { +std::pair +SROAPass::runOnAlloca(AllocaInst &AI) { + bool Changed = false; + bool CFGChanged = false; + LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); ++NumAllocasAnalyzed; // Special case dead allocas, as they're trivial. if (AI.use_empty()) { AI.eraseFromParent(); - return true; + Changed = true; + return {Changed, CFGChanged}; } const DataLayout &DL = AI.getModule()->getDataLayout(); @@ -4652,9 +4762,7 @@ auto *AT = AI.getAllocatedType(); if (AI.isArrayAllocation() || !AT->isSized() || isa(AT) || DL.getTypeAllocSize(AT).getFixedSize() == 0) - return false; - - bool Changed = false; + return {Changed, CFGChanged}; // First, split any FCA loads and stores touching this alloca to promote // better splitting and promotion opportunities. @@ -4666,7 +4774,7 @@ AllocaSlices AS(DL, AI); LLVM_DEBUG(AS.print(dbgs())); if (AS.isEscaped()) - return Changed; + return {Changed, CFGChanged}; // Delete all the dead users of this alloca before splitting and rewriting it. for (Instruction *DeadUser : AS.getDeadUsers()) { @@ -4688,7 +4796,7 @@ // No slices to split. Leave the dead alloca for a later pass to clean up. if (AS.begin() == AS.end()) - return Changed; + return {Changed, CFGChanged}; Changed |= splitAlloca(AI, AS); @@ -4696,11 +4804,15 @@ while (!SpeculatablePHIs.empty()) speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val()); - LLVM_DEBUG(dbgs() << " Speculating Selects\n"); - while (!SpeculatableSelects.empty()) - speculateSelectInstLoads(IRB, *SpeculatableSelects.pop_back_val()); + LLVM_DEBUG(dbgs() << " Rewriting Selects\n"); + auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector(); + while (!RemainingSelectsToRewrite.empty()) { + const auto [K, V] = RemainingSelectsToRewrite.pop_back_val(); + CFGChanged |= + rewriteSelectInstLoads(*K, V, IRB, PreserveCFG ? nullptr : DTU); + } - return Changed; + return {Changed, CFGChanged}; } /// Delete the dead instructions accumulated in this run. @@ -4759,16 +4871,16 @@ NumPromoted += PromotableAllocas.size(); LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, *DT, AC); + PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC); PromotableAllocas.clear(); return true; } -PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT, +PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU, AssumptionCache &RunAC) { LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); - DT = &RunDT; + DTU = &RunDTU; AC = &RunAC; BasicBlock &EntryBB = F.getEntryBlock(); @@ -4785,13 +4897,18 @@ } bool Changed = false; + bool CFGChanged = false; // A set of deleted alloca instruction pointers which should be removed from // the list of promotable allocas. SmallPtrSet DeletedAllocas; do { while (!Worklist.empty()) { - Changed |= runOnAlloca(*Worklist.pop_back_val()); + auto [IterationChanged, IterationCFGChanged] = + runOnAlloca(*Worklist.pop_back_val()); + Changed |= IterationChanged; + CFGChanged |= IterationCFGChanged; + Changed |= deleteDeadInstructions(DeletedAllocas); // Remove the deleted allocas from various lists so that we don't try to @@ -4811,19 +4928,41 @@ PostPromotionWorklist.clear(); } while (!Worklist.empty()); + assert((!CFGChanged || Changed) && "Can not only modify the CFG."); + assert((!CFGChanged || !PreserveCFG) && + "Should not have modified the CFG when told to preserve it."); + if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; - PA.preserveSet(); + if (!CFGChanged) + PA.preserveSet(); + PA.preserve(); return PA; } +PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT, + AssumptionCache &RunAC) { + DomTreeUpdater DTU(RunDT, DomTreeUpdater::UpdateStrategy::Lazy); + return runImpl(F, DTU, RunAC); +} + PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) { return runImpl(F, AM.getResult(F), AM.getResult(F)); } +void SROAPass::printPipeline( + raw_ostream &OS, function_ref MapClassName2PassName) { + static_cast *>(this)->printPipeline( + OS, MapClassName2PassName); + OS << (PreserveCFG ? "" : ""); +} + +SROAPass::SROAPass(SROAOptions PreserveCFG_) + : PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {} + /// A legacy pass for the legacy pass manager that wraps the \c SROA pass. /// /// This is in the llvm namespace purely to allow it to be a friend of the \c @@ -4835,7 +4974,8 @@ public: static char ID; - SROALegacyPass() : FunctionPass(ID) { + SROALegacyPass(SROAOptions PreserveCFG = SROAOptions::PreserveCFG) + : FunctionPass(ID), Impl(PreserveCFG) { initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); } @@ -4853,7 +4993,7 @@ AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.setPreservesCFG(); + AU.addPreserved(); } StringRef getPassName() const override { return "SROA"; } @@ -4861,7 +5001,10 @@ char SROALegacyPass::ID = 0; -FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); } +FunctionPass *llvm::createSROAPass(bool PreserveCFG) { + return new SROALegacyPass(PreserveCFG ? SROAOptions::PreserveCFG + : SROAOptions::ModifyCFG); +} INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -1545,8 +1545,14 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, - MDNode *BranchWeights) { + MDNode *BranchWeights, + DomTreeUpdater *DTU) { BasicBlock *Head = SplitBefore->getParent(); + + SmallPtrSet UniqueOrigSuccessors; + if (DTU) + UniqueOrigSuccessors.insert(succ_begin(Head), succ_end(Head)); + BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); Instruction *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); @@ -1560,6 +1566,19 @@ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond); HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights); ReplaceInstWithInst(HeadOldTerm, HeadNewTerm); + if (DTU) { + SmallVector Updates; + Updates.reserve(4 + 2 * UniqueOrigSuccessors.size()); + for (BasicBlock *Succ : successors(Head)) { + Updates.push_back({DominatorTree::Insert, Head, Succ}); + Updates.push_back({DominatorTree::Insert, Succ, Tail}); + } + for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors) + Updates.push_back({DominatorTree::Insert, Tail, UniqueOrigSuccessor}); + for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors) + Updates.push_back({DominatorTree::Delete, Head, UniqueOrigSuccessor}); + DTU->applyUpdates(Updates); + } } BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, diff --git a/llvm/test/Analysis/MemorySSA/pr39197.ll b/llvm/test/Analysis/MemorySSA/pr39197.ll --- a/llvm/test/Analysis/MemorySSA/pr39197.ll +++ b/llvm/test/Analysis/MemorySSA/pr39197.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=s390x-linux-gnu -mcpu=z13 -verify-memoryssa -passes='function(sroa),globalopt,function-attrs,function(simplifycfg,loop-mssa(licm),loop(simple-loop-unswitch))' %s -S | FileCheck %s +; RUN: opt -mtriple=s390x-linux-gnu -mcpu=z13 -verify-memoryssa -passes='function(sroa),globalopt,function-attrs,function(simplifycfg,loop-mssa(licm),loop(simple-loop-unswitch))' %s -S | FileCheck %s ; REQUIRES: asserts target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" diff --git a/llvm/test/CodeGen/AMDGPU/unroll.ll b/llvm/test/CodeGen/AMDGPU/unroll.ll --- a/llvm/test/CodeGen/AMDGPU/unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/unroll.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -passes='loop-unroll,simplifycfg,sroa' %s -S -o - | FileCheck %s -; RUN: opt -mtriple=r600-- -passes='loop-unroll,simplifycfg,sroa' %s -S -o - | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -passes='loop-unroll,simplifycfg,sroa' %s -S -o - | FileCheck %s +; RUN: opt -mtriple=r600-- -passes='loop-unroll,simplifycfg,sroa' %s -S -o - | FileCheck %s target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-addrspacecast.ll --- a/llvm/test/CodeGen/AMDGPU/vector-alloca-addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-addrspacecast.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s ; Should give up promoting alloca to vector with an addrspacecast. diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-atomic.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-atomic.ll --- a/llvm/test/CodeGen/AMDGPU/vector-alloca-atomic.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-atomic.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s ; Show that what the alloca promotion pass will do for non-atomic load/store. diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll --- a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-ALLOCA %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-PROMOTE %s -; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s target datalayout = "A5" diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll --- a/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-limits.ll @@ -1,5 +1,5 @@ -; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s +; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s target datalayout = "A5" diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll --- a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll @@ -3,8 +3,8 @@ ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC %s ; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck --check-prefixes=EG,FUNC %s -; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s -; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s target datalayout = "A5" ; OPT-LABEL: @vector_read( diff --git a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll --- a/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll +++ b/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll @@ -5,7 +5,7 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-BE -; RUN: opt --passes='sroa,loop-vectorize,loop-unroll,instcombine' -S \ +; RUN: opt --passes='sroa,loop-vectorize,loop-unroll,instcombine' -S \ ; RUN: -vectorizer-maximize-bandwidth --mtriple=powerpc64le-- -mcpu=pwr10 < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-OPT diff --git a/llvm/test/CodeGen/X86/vec_ins_extract.ll b/llvm/test/CodeGen/X86/vec_ins_extract.ll --- a/llvm/test/CodeGen/X86/vec_ins_extract.ll +++ b/llvm/test/CodeGen/X86/vec_ins_extract.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: opt < %s -passes='sroa,instcombine' | \ +; RUN: opt < %s -passes='sroa,instcombine' | \ ; RUN: llc -mtriple=i686-- -mcpu=yonah | not grep sub.*esp ; This checks that various insert/extract idiom work without going to the diff --git a/llvm/test/DebugInfo/ARM/sroa-complex.ll b/llvm/test/DebugInfo/ARM/sroa-complex.ll --- a/llvm/test/DebugInfo/ARM/sroa-complex.ll +++ b/llvm/test/DebugInfo/ARM/sroa-complex.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes='sroa' -S -o - %s | FileCheck %s +; RUN: opt -passes='sroa' -S -o - %s | FileCheck %s target datalayout = "e-m:o-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv7-apple-unknown-macho" diff --git a/llvm/test/DebugInfo/Generic/sroa-larger.ll b/llvm/test/DebugInfo/Generic/sroa-larger.ll --- a/llvm/test/DebugInfo/Generic/sroa-larger.ll +++ b/llvm/test/DebugInfo/Generic/sroa-larger.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes='sroa' -S -o - %s | FileCheck %s +; RUN: opt -passes='sroa' -S -o - %s | FileCheck %s ; Generated from clang -c -O2 -g -target x86_64-pc-windows-msvc ; struct A { ; int _Myval2; diff --git a/llvm/test/DebugInfo/Generic/sroa-samesize.ll b/llvm/test/DebugInfo/Generic/sroa-samesize.ll --- a/llvm/test/DebugInfo/Generic/sroa-samesize.ll +++ b/llvm/test/DebugInfo/Generic/sroa-samesize.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes='sroa' -S -o - %s | FileCheck %s +; RUN: opt -passes='sroa' -S -o - %s | FileCheck %s ; Generated from clang -c -O2 -g -target x86_64-pc-windows-msvc ; struct A { double x1[]; }; ; struct x2 { diff --git a/llvm/test/DebugInfo/X86/sroa-after-inlining.ll b/llvm/test/DebugInfo/X86/sroa-after-inlining.ll --- a/llvm/test/DebugInfo/X86/sroa-after-inlining.ll +++ b/llvm/test/DebugInfo/X86/sroa-after-inlining.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -passes='cgscc(function(sroa,instcombine),inline),function(instcombine,sroa),verify' -S -o - | FileCheck %s +; RUN: opt %s -passes='cgscc(function(sroa,instcombine),inline),function(instcombine,sroa),verify' -S -o - | FileCheck %s ; ; This test checks that SROA pass processes debug info correctly if applied twice. ; Specifically, after SROA works first time, instcombine converts dbg.declare diff --git a/llvm/test/DebugInfo/X86/sroasplit-1.ll b/llvm/test/DebugInfo/X86/sroasplit-1.ll --- a/llvm/test/DebugInfo/X86/sroasplit-1.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-1.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s +; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s ; ; Test that we can partial emit debug info for aggregates repeatedly ; split up by SROA. diff --git a/llvm/test/DebugInfo/X86/sroasplit-2.ll b/llvm/test/DebugInfo/X86/sroasplit-2.ll --- a/llvm/test/DebugInfo/X86/sroasplit-2.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-2.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s +; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s ; ; Test that we can partial emit debug info for aggregates repeatedly ; split up by SROA. diff --git a/llvm/test/DebugInfo/X86/sroasplit-3.ll b/llvm/test/DebugInfo/X86/sroasplit-3.ll --- a/llvm/test/DebugInfo/X86/sroasplit-3.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-3.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s +; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s ; ModuleID = 'test.c' ; Test that SROA updates the debug info correctly if an alloca was rewritten but ; not partitioned into multiple allocas. diff --git a/llvm/test/DebugInfo/X86/sroasplit-4.ll b/llvm/test/DebugInfo/X86/sroasplit-4.ll --- a/llvm/test/DebugInfo/X86/sroasplit-4.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-4.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes='sroa' < %s -S -o - | FileCheck %s +; RUN: opt -passes='sroa' < %s -S -o - | FileCheck %s ; ; Test that recursively splitting an alloca updates the debug info correctly. ; CHECK: %[[T:.*]] = load i64, i64* @t, align 8 diff --git a/llvm/test/DebugInfo/X86/sroasplit-5.ll b/llvm/test/DebugInfo/X86/sroasplit-5.ll --- a/llvm/test/DebugInfo/X86/sroasplit-5.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-5.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s +; RUN: opt %s -passes='sroa,verify' -S -o - | FileCheck %s ; From: ; struct prog_src_register { ; unsigned : 4; diff --git a/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll b/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll --- a/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -passes='sroa' -o - %s | FileCheck %s +; RUN: opt -S -passes='sroa' -o - %s | FileCheck %s ; SROA should split the alloca in two new ones, each with its own dbg.declare. ; The original alloca and dbg.declare should be removed. diff --git a/llvm/test/DebugInfo/debugify-each.ll b/llvm/test/DebugInfo/debugify-each.ll --- a/llvm/test/DebugInfo/debugify-each.ll +++ b/llvm/test/DebugInfo/debugify-each.ll @@ -9,7 +9,7 @@ ; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS ; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS -; RUN: opt -debugify-each -passes='instrprof,instrprof,sroa,sccp' -S -o /dev/null < %s 2> %t +; RUN: opt -debugify-each -passes='instrprof,instrprof,sroa,sccp' -S -o /dev/null < %s 2> %t ; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS ; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS diff --git a/llvm/test/DebugInfo/salvage-overflow.ll b/llvm/test/DebugInfo/salvage-overflow.ll --- a/llvm/test/DebugInfo/salvage-overflow.ll +++ b/llvm/test/DebugInfo/salvage-overflow.ll @@ -1,4 +1,4 @@ -; RUN: opt %s -passes='sroa,early-cse' -S | FileCheck %s +; RUN: opt %s -passes='sroa,early-cse' -S | FileCheck %s ; CHECK: DIExpression(DW_OP_constu, 9223372036854775808, DW_OP_minus, DW_OP_stack_value) ; Created from the following C input (and then delta-reduced the IR): ; diff --git a/llvm/test/Transforms/ADCE/2016-09-06.ll b/llvm/test/Transforms/ADCE/2016-09-06.ll --- a/llvm/test/Transforms/ADCE/2016-09-06.ll +++ b/llvm/test/Transforms/ADCE/2016-09-06.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='sroa,adce' -adce-remove-loops -S | FileCheck %s +; RUN: opt < %s -passes='sroa,adce' -adce-remove-loops -S | FileCheck %s ; ModuleID = 'test1.bc' source_filename = "test1.c" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/AddDiscriminators/memcpy-discriminator.ll b/llvm/test/Transforms/AddDiscriminators/memcpy-discriminator.ll --- a/llvm/test/Transforms/AddDiscriminators/memcpy-discriminator.ll +++ b/llvm/test/Transforms/AddDiscriminators/memcpy-discriminator.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='add-discriminators,sroa' -S | FileCheck %s +; RUN: opt < %s -passes='add-discriminators,sroa' -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll --- a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s +; RUN: opt %s -passes='module(globalopt),cgscc(argpromotion),function(sroa)' -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" diff --git a/llvm/test/Transforms/Coroutines/coro-debug-O2.ll b/llvm/test/Transforms/Coroutines/coro-debug-O2.ll --- a/llvm/test/Transforms/Coroutines/coro-debug-O2.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-O2.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),function(sroa)' -S | FileCheck %s +; RUN: opt < %s -passes='module(coro-early),cgscc(coro-split),function(sroa)' -S | FileCheck %s ; Checks whether the dbg.declare for `__promise` remains valid under O2. diff --git a/llvm/test/Transforms/Inline/basictest.ll b/llvm/test/Transforms/Inline/basictest.ll --- a/llvm/test/Transforms/Inline/basictest.ll +++ b/llvm/test/Transforms/Inline/basictest.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -passes='inline,sroa' -S | FileCheck %s -; RUN: opt < %s -passes='cgscc(inline,function(sroa))' -S | FileCheck %s +; RUN: opt < %s -passes='inline,sroa' -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline,function(sroa))' -S | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" define i32 @test1f(i32 %i) { diff --git a/llvm/test/Transforms/Inline/cgscc-cycle.ll b/llvm/test/Transforms/Inline/cgscc-cycle.ll --- a/llvm/test/Transforms/Inline/cgscc-cycle.ll +++ b/llvm/test/Transforms/Inline/cgscc-cycle.ll @@ -5,7 +5,7 @@ ; some out-of-band way to prevent infinitely re-inlining and re-transforming the ; code. ; -; RUN: opt < %s -passes='cgscc(inline,function(sroa,instcombine))' -inline-threshold=50 -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline,function(sroa,instcombine))' -inline-threshold=50 -S | FileCheck %s ; The `test1_*` collection of functions form a directly cycling pattern. diff --git a/llvm/test/Transforms/Inline/crash2.ll b/llvm/test/Transforms/Inline/crash2.ll --- a/llvm/test/Transforms/Inline/crash2.ll +++ b/llvm/test/Transforms/Inline/crash2.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes='inline,sroa' -max-devirt-iterations=1 -disable-output < %s +; RUN: opt -passes='inline,sroa' -max-devirt-iterations=1 -disable-output < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.3" diff --git a/llvm/test/Transforms/Inline/devirtualize-4.ll b/llvm/test/Transforms/Inline/devirtualize-4.ll --- a/llvm/test/Transforms/Inline/devirtualize-4.ll +++ b/llvm/test/Transforms/Inline/devirtualize-4.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='cgscc(devirt<4>(inline)),function(sroa,early-cse)' -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(devirt<4>(inline)),function(sroa,early-cse)' -S | FileCheck %s ; RUN: opt < %s -passes='default' -S | FileCheck %s ; Check that DoNotOptimize is inlined into Test. diff --git a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll --- a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll +++ b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=instcombine -S | FileCheck --check-prefix=IC %s -; RUN: opt < %s -passes='instcombine,sroa' -S | FileCheck --check-prefix=IC_SROA %s +; RUN: opt < %s -passes='instcombine,sroa' -S | FileCheck --check-prefix=IC_SROA %s ; rdar://6417724 ; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it. diff --git a/llvm/test/Transforms/LICM/volatile-alias.ll b/llvm/test/Transforms/LICM/volatile-alias.ll --- a/llvm/test/Transforms/LICM/volatile-alias.ll +++ b/llvm/test/Transforms/LICM/volatile-alias.ll @@ -1,5 +1,5 @@ -; RUN: opt -passes='sroa,loop(loop-rotate)' %s | opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop-mssa(licm)' -S | FileCheck %s -; RUN: opt -passes='sroa,loop(loop-rotate),loop-mssa(licm)' -verify-memoryssa -S < %s | FileCheck %s +; RUN: opt -passes='sroa,loop(loop-rotate)' %s | opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop-mssa(licm)' -S | FileCheck %s +; RUN: opt -passes='sroa,loop(loop-rotate),loop-mssa(licm)' -verify-memoryssa -S < %s | FileCheck %s ; The objects *p and *q are aliased to each other, but even though *q is ; volatile, *p can be considered invariant in the loop. Check if it is moved ; out of the loop. diff --git a/llvm/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll b/llvm/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll --- a/llvm/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll +++ b/llvm/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='sroa,loop(loop-simplifycfg),loop-mssa(licm)' -disable-output -verify-dom-info -verify-loop-info +; RUN: opt < %s -passes='sroa,loop(loop-simplifycfg),loop-mssa(licm)' -disable-output -verify-dom-info -verify-loop-info define void @inflate() { entry: diff --git a/llvm/test/Transforms/SROA/2009-02-20-InstCombine-SROA.ll b/llvm/test/Transforms/SROA/2009-02-20-InstCombine-SROA.ll --- a/llvm/test/Transforms/SROA/2009-02-20-InstCombine-SROA.ll +++ b/llvm/test/Transforms/SROA/2009-02-20-InstCombine-SROA.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; rdar://6417724 @@ -270,3 +271,6 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 attributes #0 = { argmemonly nofree nosync nounwind willreturn } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll --- a/llvm/test/Transforms/SROA/address-spaces.ll +++ b/llvm/test/Transforms/SROA/address-spaces.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-p1:16:16:16-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture readonly, i32, i1) @@ -157,3 +158,6 @@ store i64 %v2, ptr addrspace(1) %complex2 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/addrspacecast.ll b/llvm/test/Transforms/SROA/addrspacecast.ll --- a/llvm/test/Transforms/SROA/addrspacecast.ll +++ b/llvm/test/Transforms/SROA/addrspacecast.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -288,12 +289,21 @@ } define void @select_addrspacecast_const_op(i1 %a, i1 %b) { -; CHECK-LABEL: @select_addrspacecast_const_op( -; CHECK-NEXT: [[C:%.*]] = alloca i64, align 8 -; CHECK-NEXT: [[C_0_ASC_SROA_CAST:%.*]] = addrspacecast ptr [[C]] to ptr addrspace(1) -; CHECK-NEXT: [[COND_IN:%.*]] = select i1 [[B:%.*]], ptr addrspace(1) [[C_0_ASC_SROA_CAST]], ptr addrspace(1) null -; CHECK-NEXT: [[COND:%.*]] = load i64, ptr addrspace(1) [[COND_IN]], align 8 -; CHECK-NEXT: ret void +; CHECK-preserve-cfg-LABEL: @select_addrspacecast_const_op( +; CHECK-preserve-cfg-NEXT: [[C:%.*]] = alloca i64, align 8 +; CHECK-preserve-cfg-NEXT: [[C_0_ASC_SROA_CAST:%.*]] = addrspacecast ptr [[C]] to ptr addrspace(1) +; CHECK-preserve-cfg-NEXT: [[COND_IN:%.*]] = select i1 [[B:%.*]], ptr addrspace(1) [[C_0_ASC_SROA_CAST]], ptr addrspace(1) null +; CHECK-preserve-cfg-NEXT: [[COND:%.*]] = load i64, ptr addrspace(1) [[COND_IN]], align 8 +; CHECK-preserve-cfg-NEXT: ret void +; +; CHECK-modify-cfg-LABEL: @select_addrspacecast_const_op( +; CHECK-modify-cfg-NEXT: br i1 [[B:%.*]], label [[DOTCONT:%.*]], label [[DOTELSE:%.*]] +; CHECK-modify-cfg: .else: +; CHECK-modify-cfg-NEXT: [[COND_ELSE_VAL:%.*]] = load i64, ptr addrspace(1) null, align 8 +; CHECK-modify-cfg-NEXT: br label [[DOTCONT]] +; CHECK-modify-cfg: .cont: +; CHECK-modify-cfg-NEXT: [[COND:%.*]] = phi i64 [ undef, [[TMP0:%.*]] ], [ [[COND_ELSE_VAL]], [[DOTELSE]] ] +; CHECK-modify-cfg-NEXT: ret void ; %c = alloca i64, align 8 %p.0.c = select i1 %a, ptr %c, ptr %c diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll --- a/llvm/test/Transforms/SROA/alignment.ll +++ b/llvm/test/Transforms/SROA/alignment.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s -; RUN: opt -passes='debugify,function(sroa)' -S < %s | FileCheck %s -check-prefix CHECK-DEBUGLOC +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg +; RUN: opt -passes='debugify,function(sroa)' -S < %s | FileCheck %s -check-prefix CHECK-DEBUGLOC target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -456,3 +457,6 @@ } declare void @populate(ptr) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/alloca-address-space.ll b/llvm/test/Transforms/SROA/alloca-address-space.ll --- a/llvm/test/Transforms/SROA/alloca-address-space.ll +++ b/llvm/test/Transforms/SROA/alloca-address-space.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64-A2" declare void @llvm.memcpy.p2.p2.i32(ptr addrspace(2) nocapture, ptr addrspace(2) nocapture readonly, i32, i1) @@ -145,3 +146,6 @@ } declare void @llvm.lifetime.start.p0(i64 %size, ptr nocapture %ptr) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/assume.ll b/llvm/test/Transforms/SROA/assume.ll --- a/llvm/test/Transforms/SROA/assume.ll +++ b/llvm/test/Transforms/SROA/assume.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -27,3 +28,6 @@ declare void @llvm.assume(i1) #0 attributes #0 = { nofree norecurse nounwind willreturn } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -1408,9 +1409,19 @@ } define void @PR15805(i1 %a, i1 %b) { -; CHECK-LABEL: @PR15805( -; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[B:%.*]], i64 undef, i64 undef -; CHECK-NEXT: ret void +; CHECK-preserve-cfg-LABEL: @PR15805( +; CHECK-preserve-cfg-NEXT: [[C:%.*]] = alloca i64, align 8 +; CHECK-preserve-cfg-NEXT: [[COND_IN:%.*]] = select i1 [[B:%.*]], ptr [[C]], ptr [[C]] +; CHECK-preserve-cfg-NEXT: [[COND:%.*]] = load i64, ptr [[COND_IN]], align 8 +; CHECK-preserve-cfg-NEXT: ret void +; +; CHECK-modify-cfg-LABEL: @PR15805( +; CHECK-modify-cfg-NEXT: br i1 [[B:%.*]], label [[DOTCONT:%.*]], label [[DOTELSE:%.*]] +; CHECK-modify-cfg: .else: +; CHECK-modify-cfg-NEXT: br label [[DOTCONT]] +; CHECK-modify-cfg: .cont: +; CHECK-modify-cfg-NEXT: [[COND:%.*]] = phi i64 [ undef, [[TMP0:%.*]] ], [ undef, [[DOTELSE]] ] +; CHECK-modify-cfg-NEXT: ret void ; %c = alloca i64, align 8 %p.0.c = select i1 %a, ptr %c, ptr %c @@ -1425,13 +1436,29 @@ ; order in which the uses of the alloca are visited. ; ; -; CHECK-LABEL: @PR15805.1( -; CHECK-NEXT: br label [[EXIT:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 [[A:%.*]], i64 undef, i64 undef -; CHECK-NEXT: br i1 [[C2:%.*]], label [[LOOP:%.*]], label [[EXIT]] -; CHECK: exit: -; CHECK-NEXT: ret void +; CHECK-preserve-cfg-LABEL: @PR15805.1( +; CHECK-preserve-cfg-NEXT: [[C:%.*]] = alloca i64, align 8 +; CHECK-preserve-cfg-NEXT: br label [[EXIT:%.*]] +; CHECK-preserve-cfg: loop: +; CHECK-preserve-cfg-NEXT: [[COND_IN:%.*]] = select i1 [[A:%.*]], ptr [[C]], ptr [[C]] +; CHECK-preserve-cfg-NEXT: [[COND:%.*]] = load i64, ptr [[COND_IN]], align 8 +; CHECK-preserve-cfg-NEXT: br i1 [[C2:%.*]], label [[LOOP:%.*]], label [[EXIT]] +; CHECK-preserve-cfg: exit: +; CHECK-preserve-cfg-NEXT: ret void +; +; CHECK-modify-cfg-LABEL: @PR15805.1( +; CHECK-modify-cfg-NEXT: br label [[EXIT:%.*]] +; CHECK-modify-cfg: loop: +; CHECK-modify-cfg-NEXT: [[C_0_LOAD:%.*]] = load i64, ptr poison, align 8 +; CHECK-modify-cfg-NEXT: br i1 [[A:%.*]], label [[LOOP_CONT:%.*]], label [[LOOP_ELSE:%.*]] +; CHECK-modify-cfg: loop.else: +; CHECK-modify-cfg-NEXT: [[C_0_LOAD1:%.*]] = load i64, ptr poison, align 8 +; CHECK-modify-cfg-NEXT: br label [[LOOP_CONT]] +; CHECK-modify-cfg: loop.cont: +; CHECK-modify-cfg-NEXT: [[COND:%.*]] = phi i64 [ [[C_0_LOAD]], [[LOOP:%.*]] ], [ [[C_0_LOAD1]], [[LOOP_ELSE]] ] +; CHECK-modify-cfg-NEXT: br i1 [[C2:%.*]], label [[LOOP]], label [[EXIT]] +; CHECK-modify-cfg: exit: +; CHECK-modify-cfg-NEXT: ret void ; %c = alloca i64, align 8 br label %exit @@ -1483,13 +1510,24 @@ ; bail on select instructions. ; ; -; CHECK-LABEL: @PR16651.2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[TV1_SROA_0:%.*]] = alloca <2 x float>, align 8 -; CHECK-NEXT: store <2 x float> [[VAL:%.*]], ptr [[TV1_SROA_0]], align 8 -; CHECK-NEXT: [[COND105_IN_I_I:%.*]] = select i1 [[C1:%.*]], ptr null, ptr [[TV1_SROA_0]] -; CHECK-NEXT: [[COND105_I_I:%.*]] = load float, ptr [[COND105_IN_I_I]], align 8 -; CHECK-NEXT: ret void +; CHECK-preserve-cfg-LABEL: @PR16651.2( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[TV1_SROA_0:%.*]] = alloca <2 x float>, align 8 +; CHECK-preserve-cfg-NEXT: store <2 x float> [[VAL:%.*]], ptr [[TV1_SROA_0]], align 8 +; CHECK-preserve-cfg-NEXT: [[COND105_IN_I_I:%.*]] = select i1 [[C1:%.*]], ptr null, ptr [[TV1_SROA_0]] +; CHECK-preserve-cfg-NEXT: [[COND105_I_I:%.*]] = load float, ptr [[COND105_IN_I_I]], align 8 +; CHECK-preserve-cfg-NEXT: ret void +; +; CHECK-modify-cfg-LABEL: @PR16651.2( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: [[TV1_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VAL:%.*]], i32 0 +; CHECK-modify-cfg-NEXT: br i1 [[C1:%.*]], label [[ENTRY_THEN:%.*]], label [[ENTRY_CONT:%.*]] +; CHECK-modify-cfg: entry.then: +; CHECK-modify-cfg-NEXT: [[COND105_I_I_THEN_VAL:%.*]] = load float, ptr null, align 8 +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT]] +; CHECK-modify-cfg: entry.cont: +; CHECK-modify-cfg-NEXT: [[COND105_I_I:%.*]] = phi float [ [[COND105_I_I_THEN_VAL]], [[ENTRY_THEN]] ], [ [[TV1_SROA_0_0_VEC_EXTRACT]], [[ENTRY:%.*]] ] +; CHECK-modify-cfg-NEXT: ret void ; entry: %tv1 = alloca { <2 x float>, <2 x float> }, align 8 diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll --- a/llvm/test/Transforms/SROA/big-endian.ll +++ b/llvm/test/Transforms/SROA/big-endian.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -230,3 +231,6 @@ } declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/dbg-addr-diamond.ll b/llvm/test/Transforms/SROA/dbg-addr-diamond.ll --- a/llvm/test/Transforms/SROA/dbg-addr-diamond.ll +++ b/llvm/test/Transforms/SROA/dbg-addr-diamond.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -use-dbg-addr -passes=sroa -S < %s | FileCheck %s +; RUN: opt -use-dbg-addr -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -use-dbg-addr -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; ModuleID = '' source_filename = "newvars.c" @@ -127,3 +128,6 @@ !52 = !{i64 0, i64 4, !53, i64 4, i64 4, !53} !53 = !{!31, !31, i64 0} !54 = !DILocation(line: 14, column: 1, scope: !8) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/dbg-inline.ll b/llvm/test/Transforms/SROA/dbg-inline.ll --- a/llvm/test/Transforms/SROA/dbg-inline.ll +++ b/llvm/test/Transforms/SROA/dbg-inline.ll @@ -2,7 +2,8 @@ ; Test that SROA can deal with allocas that have more than one ; dbg.declare hanging off of it. -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg source_filename = "/tmp/inlinesplit.cpp" target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.15.0" @@ -69,3 +70,6 @@ !24 = distinct !DILocation(line: 10, column: 10, scope: !8) !25 = !DILocation(line: 6, column: 12, scope: !22, inlinedAt: !24) !26 = !DILocation(line: 10, column: 3, scope: !8) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/dbg-single-piece.ll b/llvm/test/Transforms/SROA/dbg-single-piece.ll --- a/llvm/test/Transforms/SROA/dbg-single-piece.ll +++ b/llvm/test/Transforms/SROA/dbg-single-piece.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=sroa %s -S | FileCheck %s +; RUN: opt -passes='sroa' %s -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -passes='sroa' %s -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %foo = type { [8 x i8], [8 x i8] } @@ -36,3 +37,6 @@ !7 = !DIExpression() !8 = !DILocation(line: 947, column: 35, scope: !2) !9 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/dead-inst.ll b/llvm/test/Transforms/SROA/dead-inst.ll --- a/llvm/test/Transforms/SROA/dead-inst.ll +++ b/llvm/test/Transforms/SROA/dead-inst.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes='bdce,sroa,bdce' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='bdce,sroa,bdce' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg + ; SROA fails to rewrite allocs but does rewrite some phis and delete ; dead instructions. Ensure that this invalidates analyses required ; for other passes. -; RUN: opt < %s -passes=bdce,sroa,bdce -o %t -debug-pass-manager 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-n32:64" target triple = "powerpc64le-grtev4-linux-gnu" @@ -13,6 +15,56 @@ ; Function Attrs: nounwind define void @H(ptr noalias nocapture readnone, [2 x i64], ptr %ptr, i32 signext %v, i64 %l, i64 %idx, ptr nonnull dereferenceable(32) %ptr2) { +; CHECK-LABEL: @H( +; CHECK-NEXT: [[TMP3:%.*]] = alloca [[CLASS_B:%.*]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue [2 x i64] [[TMP1:%.*]], 1 +; CHECK-NEXT: switch i64 [[TMP4]], label [[TMP6:%.*]] [ +; CHECK-NEXT: i64 4, label [[FOO:%.*]] +; CHECK-NEXT: i64 5, label [[TMP5:%.*]] +; CHECK-NEXT: ] +; CHECK: 5: +; CHECK-NEXT: br label [[TMP12:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP4]], 5 +; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP12]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr inttoptr (i64 4 to ptr), align 4 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP9]], 47 +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 5, i64 4 +; CHECK-NEXT: br label [[TMP12]] +; CHECK: 12: +; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ 4, [[TMP5]] ], [ [[TMP11]], [[TMP8]] ], [ 4, [[TMP6]] ] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP4]], 0 +; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i64 [[TMP4]], [[TMP13]] +; CHECK-NEXT: [[TMP16:%.*]] = and i1 [[TMP14]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP17:%.*]], label [[A_EXIT:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = tail call ptr @memchr(ptr [[PTR:%.*]], i32 signext [[V:%.*]], i64 [[L:%.*]]) +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq ptr [[TMP18]], null +; CHECK-NEXT: [[TMP20:%.*]] = sext i1 [[TMP19]] to i64 +; CHECK-NEXT: br label [[A_EXIT]] +; CHECK: a.exit: +; CHECK-NEXT: [[TMP21:%.*]] = phi i64 [ -1, [[TMP12]] ], [ [[TMP20]], [[TMP17]] ] +; CHECK-NEXT: [[TMP22:%.*]] = inttoptr i64 0 to ptr +; CHECK-NEXT: [[TMP23:%.*]] = sub nsw i64 [[TMP21]], [[TMP13]] +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[TMP3]]) +; CHECK-NEXT: [[TMP24:%.*]] = icmp ult i64 [[TMP23]], 2 +; CHECK-NEXT: br i1 [[TMP24]], label [[G_EXIT:%.*]], label [[TMP25:%.*]] +; CHECK: 25: +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP27:%.*]] = icmp eq ptr [[TMP26]], null +; CHECK-NEXT: br i1 [[TMP27]], label [[TMP28:%.*]], label [[TMP29:%.*]] +; CHECK: 28: +; CHECK-NEXT: unreachable +; CHECK: 29: +; CHECK-NEXT: call void @D(ptr nonnull sret([[CLASS_B]]) [[TMP3]], ptr nonnull dereferenceable(32) [[PTR2:%.*]]) +; CHECK-NEXT: br label [[G_EXIT]] +; CHECK: G.exit: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[TMP3]]) +; CHECK-NEXT: br label [[FOO]] +; CHECK: foo: +; CHECK-NEXT: ret void +; %3 = alloca %class.b, align 8 %.sroa.0 = alloca i64, align 8 store i64 0, ptr %.sroa.0, align 8 @@ -86,4 +138,5 @@ ; Function Attrs: argmemonly nounwind declare void @llvm.lifetime.end.p0(i64, ptr nocapture) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/fca.ll b/llvm/test/Transforms/SROA/fca.ll --- a/llvm/test/Transforms/SROA/fca.ll +++ b/llvm/test/Transforms/SROA/fca.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" define { i32, i32 } @test0(i32 %x, i32 %y, { i32, i32 } %v) { @@ -53,3 +55,6 @@ store volatile { i32, i32 } %result, ptr %b ret { i32, i32 } %result } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/ignore-droppable.ll b/llvm/test/Transforms/SROA/ignore-droppable.ll --- a/llvm/test/Transforms/SROA/ignore-droppable.ll +++ b/llvm/test/Transforms/SROA/ignore-droppable.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg declare void @llvm.assume(i1) declare void @llvm.lifetime.start.p0(i64 %size, ptr nocapture %ptr) @@ -78,3 +79,6 @@ call void @llvm.assume(i1 true) ["nonnull"(ptr %A), "align"(ptr %A, i64 2), "nonnull"(ptr %A)] ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll --- a/llvm/test/Transforms/SROA/invariant-group.ll +++ b/llvm/test/Transforms/SROA/invariant-group.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=sroa -S -o - < %s | FileCheck %s +; RUN: opt -passes='sroa' -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -passes='sroa' -S -o - < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg %t = type { i32, i32 } @@ -79,3 +80,6 @@ } !0 = !{} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/irregular-type.ll b/llvm/test/Transforms/SROA/irregular-type.ll --- a/llvm/test/Transforms/SROA/irregular-type.ll +++ b/llvm/test/Transforms/SROA/irregular-type.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg %S = type { [4 x i8] } @@ -39,3 +40,6 @@ %3 = zext i17 %2 to i32 ret i32 %3 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/lifetime-intrinsic.ll b/llvm/test/Transforms/SROA/lifetime-intrinsic.ll --- a/llvm/test/Transforms/SROA/lifetime-intrinsic.ll +++ b/llvm/test/Transforms/SROA/lifetime-intrinsic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg %i32x2 = type { [2 x i32] } @@ -55,3 +56,6 @@ attributes #0 = { alwaysinline nounwind } attributes #1 = { argmemonly nounwind } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll b/llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll --- a/llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll +++ b/llvm/test/Transforms/SROA/mem-par-metadata-sroa-cast.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; ; Make sure the llvm.access.group meta-data is preserved ; when a load/store is replaced with another load/store by sroa @@ -35,3 +36,6 @@ } !0 = distinct !{} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll b/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll --- a/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll +++ b/llvm/test/Transforms/SROA/mem-par-metadata-sroa.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; ; Make sure the llvm.access.group meta-data is preserved ; when a load/store is replaced with another load/store by sroa @@ -121,3 +122,6 @@ !4 = distinct !{!4, !5, !"_ZNK7ComplexplERKS_: %agg.result"} !5 = distinct !{!5, !"_ZNK7ComplexplERKS_"} !11 = distinct !{} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll --- a/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll +++ b/llvm/test/Transforms/SROA/non-capturing-call-readonly.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s --check-prefix=CHECK +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg define i32 @alloca_used_in_call(ptr %data, i64 %n) { ; CHECK-LABEL: @alloca_used_in_call( @@ -917,3 +918,6 @@ declare dso_local i32 @__gxx_personality_v0(...) declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/non-integral-pointers.ll b/llvm/test/Transforms/SROA/non-integral-pointers.ll --- a/llvm/test/Transforms/SROA/non-integral-pointers.ll +++ b/llvm/test/Transforms/SROA/non-integral-pointers.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=sroa -S < %s | FileCheck %s +; RUN: opt -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; This test checks that SROA does not introduce ptrtoint and inttoptr ; casts from and to non-integral pointers. The "ni:4" bit in the @@ -131,3 +132,6 @@ } declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll --- a/llvm/test/Transforms/SROA/phi-and-select.ll +++ b/llvm/test/Transforms/SROA/phi-and-select.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" define i32 @test1() { @@ -810,13 +811,23 @@ ; Don't speculate a load based on an earlier volatile operation. define i8 @volatile_select(ptr %p, i1 %b) { -; CHECK-LABEL: @volatile_select( -; CHECK-NEXT: [[P2:%.*]] = alloca i8, align 1 -; CHECK-NEXT: store i8 0, ptr [[P2]], align 1 -; CHECK-NEXT: store volatile i8 0, ptr [[P:%.*]], align 1 -; CHECK-NEXT: [[PX:%.*]] = select i1 [[B:%.*]], ptr [[P]], ptr [[P2]] -; CHECK-NEXT: [[V2:%.*]] = load i8, ptr [[PX]], align 1 -; CHECK-NEXT: ret i8 [[V2]] +; CHECK-preserve-cfg-LABEL: @volatile_select( +; CHECK-preserve-cfg-NEXT: [[P2:%.*]] = alloca i8, align 1 +; CHECK-preserve-cfg-NEXT: store i8 0, ptr [[P2]], align 1 +; CHECK-preserve-cfg-NEXT: store volatile i8 0, ptr [[P:%.*]], align 1 +; CHECK-preserve-cfg-NEXT: [[PX:%.*]] = select i1 [[B:%.*]], ptr [[P]], ptr [[P2]] +; CHECK-preserve-cfg-NEXT: [[V2:%.*]] = load i8, ptr [[PX]], align 1 +; CHECK-preserve-cfg-NEXT: ret i8 [[V2]] +; +; CHECK-modify-cfg-LABEL: @volatile_select( +; CHECK-modify-cfg-NEXT: store volatile i8 0, ptr [[P:%.*]], align 1 +; CHECK-modify-cfg-NEXT: br i1 [[B:%.*]], label [[DOTTHEN:%.*]], label [[DOTCONT:%.*]] +; CHECK-modify-cfg: .then: +; CHECK-modify-cfg-NEXT: [[V2_THEN_VAL:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-modify-cfg-NEXT: br label [[DOTCONT]] +; CHECK-modify-cfg: .cont: +; CHECK-modify-cfg-NEXT: [[V2:%.*]] = phi i8 [ [[V2_THEN_VAL]], [[DOTTHEN]] ], [ 0, [[TMP0:%.*]] ] +; CHECK-modify-cfg-NEXT: ret i8 [[V2]] ; %p2 = alloca i8 store i8 0, ptr %p2 diff --git a/llvm/test/Transforms/SROA/phi-catchswitch.ll b/llvm/test/Transforms/SROA/phi-catchswitch.ll --- a/llvm/test/Transforms/SROA/phi-catchswitch.ll +++ b/llvm/test/Transforms/SROA/phi-catchswitch.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" @@ -63,3 +64,6 @@ store i32 0, ptr %tmp11, align 4 unreachable } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll --- a/llvm/test/Transforms/SROA/phi-gep.ll +++ b/llvm/test/Transforms/SROA/phi-gep.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=sroa < %s | FileCheck %s +; RUN: opt -S -passes='sroa' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -S -passes='sroa' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg %pair = type { i32, i32 } @@ -508,3 +509,6 @@ declare i32 @__gxx_personality_v0(...) declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll b/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll --- a/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll +++ b/llvm/test/Transforms/SROA/phi-speculate-different-load-types.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=sroa < %s -S | FileCheck %s +; RUN: opt -passes='sroa' < %s -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -passes='sroa' < %s -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg define void @f(i1 %i) { ; CHECK-LABEL: @f( @@ -39,3 +40,6 @@ declare void @use32(i32) declare void @use64(i64) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll b/llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll --- a/llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll +++ b/llvm/test/Transforms/SROA/phi-with-duplicate-pred.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @a = external global i16, align 1 @@ -263,3 +264,6 @@ cleanup7: ; preds = %cleanup ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/pointer-offset-size.ll b/llvm/test/Transforms/SROA/pointer-offset-size.ll --- a/llvm/test/Transforms/SROA/pointer-offset-size.ll +++ b/llvm/test/Transforms/SROA/pointer-offset-size.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64:32" %struct.test = type { %struct.basic, %struct.basic } @@ -21,3 +22,6 @@ } declare void @llvm.memcpy.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i1) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll b/llvm/test/Transforms/SROA/ppcf128-no-fold.ll --- a/llvm/test/Transforms/SROA/ppcf128-no-fold.ll +++ b/llvm/test/Transforms/SROA/ppcf128-no-fold.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -32,3 +33,6 @@ attributes #0 = { nounwind } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/pr26972.ll b/llvm/test/Transforms/SROA/pr26972.ll --- a/llvm/test/Transforms/SROA/pr26972.ll +++ b/llvm/test/Transforms/SROA/pr26972.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux" @@ -16,3 +17,6 @@ } declare void @llvm.lifetime.end.p0(i64, ptr nocapture) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/pr37267.ll b/llvm/test/Transforms/SROA/pr37267.ll --- a/llvm/test/Transforms/SROA/pr37267.ll +++ b/llvm/test/Transforms/SROA/pr37267.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32:64-S128" target triple = "sparcv9-sun-solaris" @@ -78,3 +79,6 @@ %rc = add i16 %_tmp13, %_tmp16 ret i16 %rc } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/preserve-nonnull.ll b/llvm/test/Transforms/SROA/preserve-nonnull.ll --- a/llvm/test/Transforms/SROA/preserve-nonnull.ll +++ b/llvm/test/Transforms/SROA/preserve-nonnull.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; ; Make sure that SROA doesn't lose nonnull metadata ; on loads from allocas that get optimized out. @@ -81,3 +82,6 @@ } !0 = !{} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/scalable-vectors.ll b/llvm/test/Transforms/SROA/scalable-vectors.ll --- a/llvm/test/Transforms/SROA/scalable-vectors.ll +++ b/llvm/test/Transforms/SROA/scalable-vectors.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; This test checks that SROA runs mem2reg on scalable vectors. @@ -79,3 +81,6 @@ } declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll --- a/llvm/test/Transforms/SROA/select-gep.ll +++ b/llvm/test/Transforms/SROA/select-gep.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=sroa < %s | FileCheck %s +; RUN: opt -S -passes='sroa' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -S -passes='sroa' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg %pair = type { i32, i32 } @@ -77,12 +78,22 @@ } define i32 @test_sroa_select_gep_poison(i1 %cond) { -; CHECK-LABEL: @test_sroa_select_gep_poison( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], ptr [[A_SROA_0]], ptr poison -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT_SROA_SEL]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] +; CHECK-preserve-cfg-LABEL: @test_sroa_select_gep_poison( +; CHECK-preserve-cfg-NEXT: bb: +; CHECK-preserve-cfg-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[COND:%.*]], ptr [[A_SROA_0]], ptr poison +; CHECK-preserve-cfg-NEXT: [[LOAD:%.*]] = load i32, ptr [[SELECT_SROA_SEL]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[LOAD]] +; +; CHECK-modify-cfg-LABEL: @test_sroa_select_gep_poison( +; CHECK-modify-cfg-NEXT: bb: +; CHECK-modify-cfg-NEXT: br i1 [[COND:%.*]], label [[BB_CONT:%.*]], label [[BB_ELSE:%.*]] +; CHECK-modify-cfg: bb.else: +; CHECK-modify-cfg-NEXT: [[LOAD_ELSE_VAL:%.*]] = load i32, ptr poison, align 4 +; CHECK-modify-cfg-NEXT: br label [[BB_CONT]] +; CHECK-modify-cfg: bb.cont: +; CHECK-modify-cfg-NEXT: [[LOAD:%.*]] = phi i32 [ undef, [[BB:%.*]] ], [ [[LOAD_ELSE_VAL]], [[BB_ELSE]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[LOAD]] ; bb: %a = alloca %pair, align 4 diff --git a/llvm/test/Transforms/SROA/select-load.ll b/llvm/test/Transforms/SROA/select-load.ll --- a/llvm/test/Transforms/SROA/select-load.ll +++ b/llvm/test/Transforms/SROA/select-load.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=sroa < %s | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg %st.half = type { half } @@ -82,22 +83,41 @@ ; We should recursively evaluate select's. define i32 @clamp_load_to_constant_range(ptr %data, i64 %indvars.iv) { -; CHECK-LABEL: @clamp_load_to_constant_range( -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[MAX:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV:%.*]] -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[MIN]]) -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[MAX]]) -; CHECK-NEXT: store i32 4095, ptr [[MAX]], align 4 -; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[I1]], 0 -; CHECK-NEXT: [[I2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[I1]], i32 0) -; CHECK-NEXT: [[__B___A_I_I:%.*]] = select i1 [[CMP_I_I]], ptr [[MIN]], ptr [[ARRAYIDX]] -; CHECK-NEXT: [[CMP_I1_I:%.*]] = icmp ugt i32 [[I2]], 4095 -; CHECK-NEXT: [[__B___A_I2_I:%.*]] = select i1 [[CMP_I1_I]], ptr [[MAX]], ptr [[__B___A_I_I]] -; CHECK-NEXT: [[I3:%.*]] = load i32, ptr [[__B___A_I2_I]], align 4 -; CHECK-NEXT: ret i32 [[I3]] +; CHECK-preserve-cfg-LABEL: @clamp_load_to_constant_range( +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: [[MAX:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV:%.*]] +; CHECK-preserve-cfg-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[MIN]]) +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[MAX]]) +; CHECK-preserve-cfg-NEXT: store i32 4095, ptr [[MAX]], align 4 +; CHECK-preserve-cfg-NEXT: [[I1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-preserve-cfg-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[I1]], 0 +; CHECK-preserve-cfg-NEXT: [[I2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[I1]], i32 0) +; CHECK-preserve-cfg-NEXT: [[__B___A_I_I:%.*]] = select i1 [[CMP_I_I]], ptr [[MIN]], ptr [[ARRAYIDX]] +; CHECK-preserve-cfg-NEXT: [[CMP_I1_I:%.*]] = icmp ugt i32 [[I2]], 4095 +; CHECK-preserve-cfg-NEXT: [[__B___A_I2_I:%.*]] = select i1 [[CMP_I1_I]], ptr [[MAX]], ptr [[__B___A_I_I]] +; CHECK-preserve-cfg-NEXT: [[I3:%.*]] = load i32, ptr [[__B___A_I2_I]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[I3]] +; +; CHECK-modify-cfg-LABEL: @clamp_load_to_constant_range( +; CHECK-modify-cfg-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV:%.*]] +; CHECK-modify-cfg-NEXT: [[I1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-modify-cfg-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[I1]], 0 +; CHECK-modify-cfg-NEXT: [[I2:%.*]] = tail call i32 @llvm.smax.i32(i32 [[I1]], i32 0) +; CHECK-modify-cfg-NEXT: [[CMP_I1_I:%.*]] = icmp ugt i32 [[I2]], 4095 +; CHECK-modify-cfg-NEXT: br i1 [[CMP_I1_I]], label [[DOTCONT:%.*]], label [[DOTELSE:%.*]] +; CHECK-modify-cfg: .else: +; CHECK-modify-cfg-NEXT: br i1 [[CMP_I_I]], label [[DOTELSE_CONT:%.*]], label [[DOTELSE_ELSE:%.*]] +; CHECK-modify-cfg: .else.else: +; CHECK-modify-cfg-NEXT: [[I3_ELSE_VAL_ELSE_VAL:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-modify-cfg-NEXT: br label [[DOTELSE_CONT]] +; CHECK-modify-cfg: .else.cont: +; CHECK-modify-cfg-NEXT: [[I3_ELSE_VAL:%.*]] = phi i32 [ 0, [[DOTELSE]] ], [ [[I3_ELSE_VAL_ELSE_VAL]], [[DOTELSE_ELSE]] ] +; CHECK-modify-cfg-NEXT: br label [[DOTCONT]] +; CHECK-modify-cfg: .cont: +; CHECK-modify-cfg-NEXT: [[I3:%.*]] = phi i32 [ 4095, [[TMP0:%.*]] ], [ [[I3_ELSE_VAL]], [[DOTELSE_CONT]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[I3]] ; %min = alloca i32, align 4 %max = alloca i32, align 4 @@ -117,13 +137,23 @@ } define i32 @non_speculatable_load_of_select(i1 %cond, ptr %else.addr) { -; CHECK-LABEL: @non_speculatable_load_of_select( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[MIN]], ptr [[ELSE_ADDR:%.*]], !prof [[PROF0:![0-9]+]] -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_load_of_select( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[MIN]], ptr [[ELSE_ADDR:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_load_of_select( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: br i1 [[COND:%.*]], label [[ENTRY_CONT:%.*]], label [[ENTRY_ELSE:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-modify-cfg: entry.else: +; CHECK-modify-cfg-NEXT: [[R_ELSE_VAL:%.*]] = load i32, ptr [[ELSE_ADDR:%.*]], align 4 +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT]] +; CHECK-modify-cfg: entry.cont: +; CHECK-modify-cfg-NEXT: [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[R_ELSE_VAL]], [[ENTRY_ELSE]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %min = alloca i32, align 4 @@ -133,13 +163,23 @@ ret i32 %r } define i32 @non_speculatable_load_of_select_inverted(i1 %cond, ptr %then.addr) { -; CHECK-LABEL: @non_speculatable_load_of_select_inverted( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MAX:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 4095, ptr [[MAX]], align 4 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[THEN_ADDR:%.*]], ptr [[MAX]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_load_of_select_inverted( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MAX:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 4095, ptr [[MAX]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[THEN_ADDR:%.*]], ptr [[MAX]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_load_of_select_inverted( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: br i1 [[COND:%.*]], label [[ENTRY_THEN:%.*]], label [[ENTRY_CONT:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK-modify-cfg: entry.then: +; CHECK-modify-cfg-NEXT: [[R_THEN_VAL:%.*]] = load i32, ptr [[THEN_ADDR:%.*]], align 4 +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT]] +; CHECK-modify-cfg: entry.cont: +; CHECK-modify-cfg-NEXT: [[R:%.*]] = phi i32 [ [[R_THEN_VAL]], [[ENTRY_THEN]] ], [ 4095, [[ENTRY:%.*]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %max = alloca i32, align 4 @@ -150,13 +190,21 @@ } define i32 @non_speculatable_volatile_load_of_select(i1 %cond, ptr %else.addr) { -; CHECK-LABEL: @non_speculatable_volatile_load_of_select( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[MIN]], ptr [[ELSE_ADDR:%.*]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load volatile i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_volatile_load_of_select( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[MIN]], ptr [[ELSE_ADDR:%.*]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load volatile i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_volatile_load_of_select( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-modify-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-modify-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[MIN]], ptr [[ELSE_ADDR:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: [[R:%.*]] = load volatile i32, ptr [[ADDR]], align 4 +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %min = alloca i32, align 4 @@ -166,13 +214,21 @@ ret i32 %r } define i32 @non_speculatable_volatile_load_of_select_inverted(i1 %cond, ptr %then.addr) { -; CHECK-LABEL: @non_speculatable_volatile_load_of_select_inverted( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MAX:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 4095, ptr [[MAX]], align 4 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[THEN_ADDR:%.*]], ptr [[MAX]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load volatile i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_volatile_load_of_select_inverted( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MAX:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 4095, ptr [[MAX]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[THEN_ADDR:%.*]], ptr [[MAX]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load volatile i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_volatile_load_of_select_inverted( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: [[MAX:%.*]] = alloca i32, align 4 +; CHECK-modify-cfg-NEXT: store i32 4095, ptr [[MAX]], align 4 +; CHECK-modify-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[THEN_ADDR:%.*]], ptr [[MAX]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: [[R:%.*]] = load volatile i32, ptr [[ADDR]], align 4 +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %max = alloca i32, align 4 @@ -183,13 +239,25 @@ } define i32 @non_speculatable_atomic_unord_load_of_select(i1 %cond, ptr %else.addr) { -; CHECK-LABEL: @non_speculatable_atomic_unord_load_of_select( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[MIN]], ptr [[ELSE_ADDR:%.*]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[ADDR]] unordered, align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_atomic_unord_load_of_select( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[MIN]], ptr [[ELSE_ADDR:%.*]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load atomic i32, ptr [[ADDR]] unordered, align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_atomic_unord_load_of_select( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: br i1 [[COND:%.*]], label [[ENTRY_THEN:%.*]], label [[ENTRY_ELSE:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg: entry.then: +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT:%.*]] +; CHECK-modify-cfg: entry.else: +; CHECK-modify-cfg-NEXT: [[R_ELSE_VAL:%.*]] = load atomic i32, ptr [[ELSE_ADDR:%.*]] unordered, align 4 +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT]] +; CHECK-modify-cfg: entry.cont: +; CHECK-modify-cfg-NEXT: [[R:%.*]] = phi i32 [ 0, [[ENTRY_THEN]] ], [ [[R_ELSE_VAL]], [[ENTRY_ELSE]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %min = alloca i32, align 4 @@ -199,13 +267,25 @@ ret i32 %r } define i32 @non_speculatable_atomic_unord_load_of_select_inverted(i1 %cond, ptr %then.addr) { -; CHECK-LABEL: @non_speculatable_atomic_unord_load_of_select_inverted( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MAX:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 4095, ptr [[MAX]], align 4 -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[THEN_ADDR:%.*]], ptr [[MAX]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load atomic i32, ptr [[ADDR]] unordered, align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_atomic_unord_load_of_select_inverted( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MAX:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 4095, ptr [[MAX]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND:%.*]], ptr [[THEN_ADDR:%.*]], ptr [[MAX]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load atomic i32, ptr [[ADDR]] unordered, align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_atomic_unord_load_of_select_inverted( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: br i1 [[COND:%.*]], label [[ENTRY_THEN:%.*]], label [[ENTRY_ELSE:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg: entry.then: +; CHECK-modify-cfg-NEXT: [[R_THEN_VAL:%.*]] = load atomic i32, ptr [[THEN_ADDR:%.*]] unordered, align 4 +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT:%.*]] +; CHECK-modify-cfg: entry.else: +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT]] +; CHECK-modify-cfg: entry.cont: +; CHECK-modify-cfg-NEXT: [[R:%.*]] = phi i32 [ [[R_THEN_VAL]], [[ENTRY_THEN]] ], [ 4095, [[ENTRY_ELSE]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %max = alloca i32, align 4 @@ -216,14 +296,25 @@ } define i32 @non_speculatable_load_of_select_outer(i1 %cond_inner, i1 %cond_outer, ptr %data_then, ptr %data_else) { -; CHECK-LABEL: @non_speculatable_load_of_select_outer( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: [[ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[DATA_THEN:%.*]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN]], ptr [[ADDR_DATA]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_load_of_select_outer( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[DATA_THEN:%.*]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN]], ptr [[ADDR_DATA]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_load_of_select_outer( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: [[ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[DATA_THEN:%.*]], ptr [[DATA_ELSE:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: br i1 [[COND_OUTER:%.*]], label [[ENTRY_CONT:%.*]], label [[ENTRY_ELSE:%.*]], !prof [[PROF0]] +; CHECK-modify-cfg: entry.else: +; CHECK-modify-cfg-NEXT: [[R_ELSE_VAL:%.*]] = load i32, ptr [[ADDR_DATA]], align 4 +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT]] +; CHECK-modify-cfg: entry.cont: +; CHECK-modify-cfg-NEXT: [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[R_ELSE_VAL]], [[ENTRY_ELSE]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %min = alloca i32, align 4 @@ -234,14 +325,25 @@ ret i32 %r } define i32 @non_speculatable_load_of_select_outer_inverted(i1 %cond_inner, i1 %cond_outer, ptr %data_then, ptr %data_else) { -; CHECK-LABEL: @non_speculatable_load_of_select_outer_inverted( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: [[ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[DATA_THEN:%.*]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[ADDR_DATA]], ptr [[MIN]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_load_of_select_outer_inverted( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: [[ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[DATA_THEN:%.*]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[ADDR_DATA]], ptr [[MIN]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_load_of_select_outer_inverted( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: [[ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[DATA_THEN:%.*]], ptr [[DATA_ELSE:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: br i1 [[COND_OUTER:%.*]], label [[ENTRY_THEN:%.*]], label [[ENTRY_CONT:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg: entry.then: +; CHECK-modify-cfg-NEXT: [[R_THEN_VAL:%.*]] = load i32, ptr [[ADDR_DATA]], align 4 +; CHECK-modify-cfg-NEXT: br label [[ENTRY_CONT]] +; CHECK-modify-cfg: entry.cont: +; CHECK-modify-cfg-NEXT: [[R:%.*]] = phi i32 [ [[R_THEN_VAL]], [[ENTRY_THEN]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %min = alloca i32, align 4 @@ -253,14 +355,23 @@ } define i32 @non_speculatable_load_of_select_inner(i1 %cond_inner, i1 %cond_outer, ptr %data_else, ptr %min_else) { -; CHECK-LABEL: @non_speculatable_load_of_select_inner( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: [[MIN_ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[MIN]], ptr [[MIN_ELSE:%.*]], !prof [[PROF0]] -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN_ADDR_DATA]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_load_of_select_inner( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: [[MIN_ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[MIN]], ptr [[MIN_ELSE:%.*]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN_ADDR_DATA]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_load_of_select_inner( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-modify-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-modify-cfg-NEXT: [[MIN_ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[MIN]], ptr [[MIN_ELSE:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN_ADDR_DATA]], ptr [[DATA_ELSE:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %min = alloca i32, align 4 @@ -271,14 +382,23 @@ ret i32 %r } define i32 @non_speculatable_load_of_select_inner_inverted(i1 %cond_inner, i1 %cond_outer, ptr %data_else, ptr %min_then) { -; CHECK-LABEL: @non_speculatable_load_of_select_inner_inverted( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[MIN:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, ptr [[MIN]], align 4 -; CHECK-NEXT: [[MIN_ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[MIN_THEN:%.*]], ptr [[MIN]], !prof [[PROF0]] -; CHECK-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN_ADDR_DATA]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] -; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 -; CHECK-NEXT: ret i32 [[R]] +; CHECK-preserve-cfg-LABEL: @non_speculatable_load_of_select_inner_inverted( +; CHECK-preserve-cfg-NEXT: entry: +; CHECK-preserve-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-preserve-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-preserve-cfg-NEXT: [[MIN_ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[MIN_THEN:%.*]], ptr [[MIN]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN_ADDR_DATA]], ptr [[DATA_ELSE:%.*]], !prof [[PROF0]] +; CHECK-preserve-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-preserve-cfg-NEXT: ret i32 [[R]] +; +; CHECK-modify-cfg-LABEL: @non_speculatable_load_of_select_inner_inverted( +; CHECK-modify-cfg-NEXT: entry: +; CHECK-modify-cfg-NEXT: [[MIN:%.*]] = alloca i32, align 4 +; CHECK-modify-cfg-NEXT: store i32 0, ptr [[MIN]], align 4 +; CHECK-modify-cfg-NEXT: [[MIN_ADDR_DATA:%.*]] = select i1 [[COND_INNER:%.*]], ptr [[MIN_THEN:%.*]], ptr [[MIN]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: [[ADDR:%.*]] = select i1 [[COND_OUTER:%.*]], ptr [[MIN_ADDR_DATA]], ptr [[DATA_ELSE:%.*]], !prof [[PROF1]] +; CHECK-modify-cfg-NEXT: [[R:%.*]] = load i32, ptr [[ADDR]], align 4 +; CHECK-modify-cfg-NEXT: ret i32 [[R]] ; entry: %min = alloca i32, align 4 diff --git a/llvm/test/Transforms/SROA/slice-order-independence.ll b/llvm/test/Transforms/SROA/slice-order-independence.ll --- a/llvm/test/Transforms/SROA/slice-order-independence.ll +++ b/llvm/test/Transforms/SROA/slice-order-independence.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind @@ -44,3 +45,6 @@ %b0 = load i63, ptr %b ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind @@ -157,3 +158,6 @@ %L2 = load i1, ptr %A ret i1 %L2 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll --- a/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll +++ b/llvm/test/Transforms/SROA/sroa-common-type-fail-promotion.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=sroa -S < %s | FileCheck %s +; RUN: opt -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg %"struct.a" = type { <8 x half> } %"struct.b" = type { %"struct.a" } @@ -409,3 +410,6 @@ declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) nounwind attributes #0 = { nounwind readonly } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/std-clamp.ll b/llvm/test/Transforms/SROA/std-clamp.ll --- a/llvm/test/Transforms/SROA/std-clamp.ll +++ b/llvm/test/Transforms/SROA/std-clamp.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" @@ -29,3 +30,6 @@ %i9 = load float, ptr %i8, align 4 ret float %i9 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll b/llvm/test/Transforms/SROA/tbaa-struct.ll --- a/llvm/test/Transforms/SROA/tbaa-struct.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=sroa %s | FileCheck %s +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; SROA should keep `!tbaa.struct` metadata @@ -26,3 +27,6 @@ !8 = !{!"float", !4, i64 0} !10 = !{i64 0, i64 4, !11, i64 4, i64 4, !11} !11 = !{!8, !8, i64 0} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=sroa %s | FileCheck %s +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; SROA should correctly offset `!tbaa.struct` metadata @@ -34,3 +35,6 @@ !6 = !{!"Simple C++ TBAA"} !7 = !{!8, !8, i64 0} !8 = !{!"int", !5, i64 0} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/tbaa-subload.ll b/llvm/test/Transforms/SROA/tbaa-subload.ll --- a/llvm/test/Transforms/SROA/tbaa-subload.ll +++ b/llvm/test/Transforms/SROA/tbaa-subload.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=sroa %s | FileCheck %s +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -S -passes='sroa' %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; This should not crash @@ -34,3 +35,6 @@ !7 = !{!8, !3, i64 8} !8 = !{!"_ZTSZN2ax2baEMS_FvvE2an2arE3$_0", !9, i64 0, !3, i64 8} !9 = !{!"_ZTS2ar"} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/vector-conversion.ll b/llvm/test/Transforms/SROA/vector-conversion.ll --- a/llvm/test/Transforms/SROA/vector-conversion.ll +++ b/llvm/test/Transforms/SROA/vector-conversion.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" define <4 x i64> @vector_ptrtoint({<2 x ptr>, <2 x ptr>} %x) { @@ -98,3 +99,6 @@ ret <16 x i8> %vec } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll b/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll --- a/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll +++ b/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=sroa -S < %s | FileCheck %s +; RUN: opt -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -passes='sroa' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64" @@ -30,3 +31,6 @@ declare void @wombat3(<3 x float>) #0 attributes #0 = { nounwind } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/vector-promotion-different-size.ll b/llvm/test/Transforms/SROA/vector-promotion-different-size.ll --- a/llvm/test/Transforms/SROA/vector-promotion-different-size.ll +++ b/llvm/test/Transforms/SROA/vector-promotion-different-size.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" define <4 x i1> @vector_bitcast() { @@ -27,3 +28,6 @@ %load = load <64 x i16>, ptr %p ret <64 x i16> %load } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa -S | FileCheck %s +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt < %s -passes='sroa' -S | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" %S1 = type { i64, [42 x float] } @@ -631,3 +632,6 @@ declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) declare void @llvm.lifetime.end.p0(i64, ptr) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SROA/vectors-of-pointers.ll b/llvm/test/Transforms/SROA/vectors-of-pointers.ll --- a/llvm/test/Transforms/SROA/vectors-of-pointers.ll +++ b/llvm/test/Transforms/SROA/vectors-of-pointers.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=sroa +; RUN: opt -S -passes='sroa' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-preserve-cfg +; RUN: opt -S -passes='sroa' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-modify-cfg ; Make sure we don't crash on this one. @@ -7,6 +8,18 @@ target triple = "x86_64-apple-macosx10.8.0" define void @foo(i1 %c1, i1 %c2) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB0_EXIT158:%.*]], label [[IF_THEN_I_I_I_I_I138:%.*]] +; CHECK: if.then.i.i.i.i.i138: +; CHECK-NEXT: ret void +; CHECK: bb0.exit158: +; CHECK-NEXT: br i1 [[C2:%.*]], label [[BB0_EXIT257:%.*]], label [[IF_THEN_I_I_I_I_I237:%.*]] +; CHECK: if.then.i.i.i.i.i237: +; CHECK-NEXT: unreachable +; CHECK: bb0.exit257: +; CHECK-NEXT: ret void +; entry: %Args.i = alloca <2 x ptr>, align 16 br i1 %c1, label %bb0.exit158, label %if.then.i.i.i.i.i138 @@ -24,3 +37,6 @@ %0 = load <2 x ptr>, ptr %Args.i, align 16 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-modify-cfg: {{.*}} +; CHECK-preserve-cfg: {{.*}} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll --- a/llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/2011-09-26-EHCrash.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='sroa,simple-loop-unswitch' -verify-memoryssa -disable-output +; RUN: opt < %s -passes='sroa,simple-loop-unswitch' -verify-memoryssa -disable-output ; PR11016 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-macosx10.7.2" diff --git a/llvm/test/Transforms/Util/dbg-user-of-aext.ll b/llvm/test/Transforms/Util/dbg-user-of-aext.ll --- a/llvm/test/Transforms/Util/dbg-user-of-aext.ll +++ b/llvm/test/Transforms/Util/dbg-user-of-aext.ll @@ -1,7 +1,7 @@ ; Checks that llvm.dbg.declare -> llvm.dbg.value conversion utility ; (here exposed through the SROA) pass refers to [s|z]exts of values (as ; opposed to the operand of a [s|z]ext). -; RUN: opt -S -passes='sroa' %s | FileCheck %s +; RUN: opt -S -passes='sroa' %s | FileCheck %s ; Built from: ; struct foo { bool b; long i; }; diff --git a/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp b/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp --- a/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp +++ b/llvm/tools/llvm-reduce/deltas/RunIRPasses.cpp @@ -14,12 +14,12 @@ using namespace llvm; -static cl::opt PassPipeline( - "ir-passes", - cl::desc("A textual description of the pass pipeline, same as " - "what's passed to `opt -passes`."), - cl::init( - "function(sroa,instcombine,gvn,simplifycfg,infer-address-spaces)")); +static cl::opt + PassPipeline("ir-passes", + cl::desc("A textual description of the pass pipeline, same as " + "what's passed to `opt -passes`."), + cl::init("function(sroa,instcombine,gvn," + "simplifycfg,infer-address-spaces)")); static void runPasses(Oracle &O, Module &Program) { LoopAnalysisManager LAM;