diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -326,6 +326,7 @@ void initializeModuleMemProfilerLegacyPassPass(PassRegistry &); void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&); void initializeModuloScheduleTestPass(PassRegistry&); +void initializeMSSAArgPromotionPass(PassRegistry &); void initializeMustExecutePrinterPass(PassRegistry&); void initializeMustBeExecutedContextPrinterPass(PassRegistry&); void initializeNameAnonGlobalLegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h --- a/llvm/include/llvm/Transforms/IPO.h +++ b/llvm/include/llvm/Transforms/IPO.h @@ -158,6 +158,11 @@ /// Pass *createArgumentPromotionPass(unsigned maxElements = 3); +//===----------------------------------------------------------------------===// +/// createMSSAArgPromotionPass - This pass promotes "by reference" arguments to +/// be passed by value. Input or/and Output arguments supported. +Pass *createMSSAArgPromotionPass(); + //===----------------------------------------------------------------------===// /// createOpenMPOptLegacyPass - OpenMP specific optimizations. Pass *createOpenMPOptCGSCCLegacyPass(); diff --git a/llvm/include/llvm/Transforms/IPO/MSSAArgPromotion.h b/llvm/include/llvm/Transforms/IPO/MSSAArgPromotion.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/MSSAArgPromotion.h @@ -0,0 +1,26 @@ +//===- MSSAArgPromotionPass.cpp - Promote by-reference arguments -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class MSSAArgPromotionPass : public PassInfoMixin { +public: + MSSAArgPromotionPass() {} + + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); +}; + +} // end namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -111,6 +111,7 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/MSSAArgPromotion.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -161,6 +161,7 @@ #define CGSCC_PASS(NAME, CREATE_PASS) #endif CGSCC_PASS("argpromotion", ArgumentPromotionPass()) +CGSCC_PASS("mssaargpromotion", MSSAArgPromotionPass()) CGSCC_PASS("invalidate", InvalidateAllAnalysesPass()) CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/IPO/MSSAArgPromotion.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -574,10 +575,14 @@ PM.add(llvm::createAMDGPUSimplifyLibCallsPass(this)); }); + bool EnableAggressiveOpt = getOptLevel() >= CodeGenOpt::Aggressive; Builder.addExtension( PassManagerBuilder::EP_CGSCCOptimizerLate, - [EnableOpt, PromoteKernelArguments](const PassManagerBuilder &, - legacy::PassManagerBase &PM) { + [=](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + // Add pass to promote arguments passed by reference + if (EnableAggressiveOpt) + PM.add(createMSSAArgPromotionPass()); + // Add promote kernel arguments pass to the opt pipeline right before // infer address spaces which is needed to do actual address space // rewriting. @@ -737,6 +742,10 @@ FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this)); } + // Add pass to promote arguments passed by reference + if (Level.getSpeedupLevel() >= OptimizationLevel::O3.getSpeedupLevel()) + PM.addPass(MSSAArgPromotionPass()); + PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); }); } diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -30,6 +30,7 @@ LowerTypeTests.cpp MergeFunctions.cpp ModuleInliner.cpp + MSSAArgPromotion.cpp OpenMPOpt.cpp PartialInlining.cpp PassManagerBuilder.cpp diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp --- a/llvm/lib/Transforms/IPO/IPO.cpp +++ b/llvm/lib/Transforms/IPO/IPO.cpp @@ -47,6 +47,7 @@ initializeSingleLoopExtractorPass(Registry); initializeLowerTypeTestsPass(Registry); initializeMergeFunctionsLegacyPassPass(Registry); + initializeMSSAArgPromotionPass(Registry); initializePartialInlinerLegacyPassPass(Registry); initializeAttributorLegacyPassPass(Registry); initializeAttributorCGSCCLegacyPassPass(Registry); diff --git a/llvm/lib/Transforms/IPO/MSSAArgPromotion.cpp b/llvm/lib/Transforms/IPO/MSSAArgPromotion.cpp new file mode 100755 --- /dev/null +++ b/llvm/lib/Transforms/IPO/MSSAArgPromotion.cpp @@ -0,0 +1,1420 @@ +//===------ MSSAArgPromotionPass.cpp - Promote by-reference arguments -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass promotes function argument passed by reference: +// 1. Input argument: if the argument is read it is promoted to the argument +// passed by value. Callers load the argument's value and pass it to the +// function. +// 2. Output argument: if the argument is modified the function return type is +// transformed into an aggregate and the final argument's value is returned +// as a component of the return value. Callers store the returned value +// using the original argument pointer. +// 3. Input/Output argument: the combination of the above. +// +// int foo(int a, int *x) { +// *x += 2; +// return a; +// } +// int MemVar; +// int X = foo(1, &MemVar); +// +// into: +// +// struct { int, int } foo (int a, int x) { +// return { a, x + 2 }; +// } +// int MemVar; +// struct { int, int } S = foo(1, MemVar); +// int X = S.first; +// MemVar = S.second; +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/MSSAArgPromotion.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "mssaargpromotion" + +STATISTIC(NumInArgCandidates, "Number of input argument candidates found"); +STATISTIC(NumInArgPromoted, "Number of of input argument promoted"); +STATISTIC(NumInOutArgCandidates, "Number of in/out argument candidates found"); +STATISTIC(NumInOutArgPromoted, "Number of of in/out argument promoted"); + +// When searching for a clobber for an argument we constrain the number of +// expensive uncached MSSA walks. +static cl::opt MaxMSSAWalksNum( + "argpromo-mssa-walks-limit", cl::Hidden, cl::init(10000), + cl::desc( + "Function argument promotion pass: the maximum number of MSSA walks" + " per argument on a clobber search (default = 1000)")); + +// Return dot prefixed string twine if S isn't empty (used for BB's names). +static inline Twine dot(const StringRef &S) { + return !S.empty() ? Twine('.') + S : Twine(); +} + +// Structure describing argument for promotion. +struct ArgPromotionInfo { + Argument *Arg; + Type *ArgType; + Align ArgAlign; + uint32_t Preload : 1; // Argument requires initial value to be passed to + // the function. + uint32_t Return : 1; // Argument should be returned by the function. + + // When the argument is promoted we need a new argument for the incoming + // preloaded value but the new function signature isn't known yet and + // therefore isn't created. We use a dummy argument to start with and + // after the new function is created its RAUWed with the function's + // argument, see createNewFunction. + std::unique_ptr PreloadArgDummy; + + // Index of the value in the aggregated return type (insert/extract_value idx) + unsigned ReturnValueIndex = (unsigned)-1; + + // If one candidate clobbers another this field denotes the relationship. + // Used to find "declobbering" promotion sequence. + ArgPromotionInfo *ClobberedBy = nullptr; + + AAMDNodes AAMD; // Merged AA metadata for the load/store. + + ArgPromotionInfo(Argument *Arg_ = nullptr, Type *ArgType_ = nullptr, + Align ArgAlign_ = Align()) + : Arg(Arg_), ArgType(ArgType_), ArgAlign(ArgAlign_) { + Preload = Return = 0; + } + + unsigned getArgNo() const { return Arg->getArgNo(); } + + bool isUnusedArg() const { return !Preload && !Return; } + + // Return true if this argument is promoted. + bool isPromoted() const { + return PreloadArgDummy || ReturnValueIndex != (unsigned)-1; + } + + // TODO: this is a placeholder for checking GEP indexes + bool isMyPtr(Value *Ptr) const { return Ptr && Ptr == Arg; } + + // Predicates returning true if the value is a load or store by this + // argument (TODO: this will check GEPs later). + bool isMyLoad(Value *V) const { + LoadInst *LI = dyn_cast(V); + return LI ? isMyPtr(LI->getPointerOperand()) : false; + } + bool isMyStore(Value *V) const { + StoreInst *SI = dyn_cast(V); + return SI ? isMyPtr(SI->getPointerOperand()) : false; + } + bool isMyLoadOrStore(Value *V) const { + if (LoadInst *LI = dyn_cast(V)) + return isMyPtr(LI->getPointerOperand()); + if (StoreInst *SI = dyn_cast(V)) + return isMyPtr(SI->getPointerOperand()); + return false; + } + + MemoryLocation getMemLoc() const { + const auto &DL = Arg->getParent()->getParent()->getDataLayout(); + return MemoryLocation(Arg, + LocationSize::precise(DL.getTypeStoreSize(ArgType))); + } + + bool isClobberedBy(const ArgPromotionInfo &A) const { + const ArgPromotionInfo *P = this; + while ((P = P->ClobberedBy)) { + if (&A == P) + return true; + } + return false; + } + + Twine getParamName(StringRef &&LifeTimeOwner = StringRef()) const { + // The problem with a twine is that StringRef it references should be alive + // when the twine is alive: use LifeTimeOwner to keep the StringRef alive + // at least for the lifetime of the full expression. + LifeTimeOwner = Arg->getName(); + return LifeTimeOwner + ".0.val"; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD StringRef getKindStr() const { + if (Preload && Return) + return "inout"; + return Preload ? "in" : "out"; + } +#endif + + Argument *getOrCreatePreloadArgDummy() { + if (!PreloadArgDummy) + PreloadArgDummy = std::make_unique(ArgType); + return PreloadArgDummy.get(); + } + + LoadInst *createLoad(IRBuilder &IRB, Value *Ptr, + const StringRef &Name) const { + LoadInst *Load = + IRB.CreateAlignedLoad(ArgType, Ptr, ArgAlign, Name + ".val"); + if (AAMD) + Load->setAAMetadata(AAMD); + return Load; + } + + StoreInst *createStore(IRBuilder &IRB, Value *V, Value *Ptr) const { + StoreInst *Store = IRB.CreateAlignedStore(V, Ptr, ArgAlign); + if (AAMD) + Store->setAAMetadata(AAMD); + return Store; + } + + // Iterator to hide impl details on iterating promoted argument's users, + // espesially when GEPs added, by now - minimal trivial implementation. + class user_iterator + : public iterator_facade_base { + Argument::user_iterator ArgUserI; + friend struct ArgPromotionInfo; + user_iterator(const Argument::user_iterator &I) : ArgUserI(I) {} + + public: + value_type operator*() const { return *ArgUserI; } + user_iterator &operator++() { + ++ArgUserI; + return *this; + } + user_iterator operator++(int) { + auto R = *this; + ++ArgUserI; + return R; + } + bool operator==(const user_iterator &RHS) const { + return ArgUserI == RHS.ArgUserI; + } + }; + user_iterator user_begin() const { return user_iterator(Arg->user_begin()); } + user_iterator user_end() const { return user_iterator(Arg->user_end()); } + iterator_range users() const { + return make_range(user_begin(), user_end()); + } +}; + +// Return true if Pred is true for all callers passing P.Arg. +static bool allCallersPass( + const ArgPromotionInfo &P, + function_ref Pred) { + Function *Callee = P.Arg->getParent(); + for (User *U : Callee->users()) { + assert(isa(U)); + CallBase *CB = cast(U); + if (!Pred(CB, CB->getArgOperand(P.getArgNo()), P)) + return false; + } + return true; +} + +// Given the function pointer argument that is only used by loads +// return true if the value pointed by the argument can be loaded before the +// function call and passed in: +// either the value is loaded by the ptr arg on every function path +// or the pointer is valid for all callsites in the program. +static bool isROCandidate(ArgPromotionInfo &Candidate) { + SmallPtrSet ReadPerBB; + for (Value *U : Candidate.users()) { + assert(Candidate.isMyLoad(U)); + ReadPerBB.insert(cast(U)->getParent()); + } + bool HasLoadOnEveryPath = true; + Function *F = Candidate.Arg->getParent(); + auto *EntryBB = &F->getEntryBlock(); + for (auto DFI = df_begin(EntryBB), E = df_end(EntryBB); DFI != E;) { + BasicBlock *BB = *DFI; + if (ReadPerBB.count(BB)) { + DFI.skipChildren(); // This path already have load - skipping children. + continue; + } + if (isa(BB->getTerminator())) { + HasLoadOnEveryPath = false; + break; + } + ++DFI; + } + + // Return true if we can prove that caller pass in a valid pointer. + const DataLayout &DL = F->getParent()->getDataLayout(); + auto IsValidPtr = [&DL](Value *A, const ArgPromotionInfo &P) -> bool { + return isDereferenceableAndAlignedPointer(A, P.ArgType, P.ArgAlign, DL); + }; + + Candidate.Preload = + HasLoadOnEveryPath || + // Check if the argument itself is marked dereferenceable and aligned, + IsValidPtr(Candidate.Arg, Candidate) || + // or this is true for all the callers + allCallersPass(Candidate, [&IsValidPtr](CallBase *, Value *A, + const ArgPromotionInfo &P) { + return IsValidPtr(A, P); + }); + + LLVM_DEBUG(dbgs() << " - "; + if (HasLoadOnEveryPath) dbgs() << "has a load on every path,"; + else dbgs() + << (Candidate.Preload ? "" : "not") + << " all callers pass a valid and aligned dereferenceable ptr,"); + + return Candidate.Preload; +} + +// Given the function pointer argument that is only used by stores and maybe +// loads return true if the value pointed by the argument can be stored after +// and loaded before the function call and passed in/returned by the function: +// either the value is stored on every function path +// or the pointer points to a thread local memory that doesn't escape before +// the function call for every callsite in the program. +// Check is made if a load precedes stores on any path so the initial value +// should be passed in as a parameter. +static bool isRWCandidate(FunctionAnalysisManager &FAM, + ArgPromotionInfo &Candidate) { + SmallDenseMap RWPerBB; + enum { HasReads = 1, HasWrites = 2 }; + for (Value *U : Candidate.users()) { + assert(Candidate.isMyLoadOrStore(U)); + RWPerBB[cast(U)->getParent()] |= + isa(U) ? HasReads : HasWrites; + } + bool HasLoadBeforeStore = false; + bool HasStoreOnEveryPath = true; + Function *F = Candidate.Arg->getParent(); + auto *EntryBB = &F->getEntryBlock(); + for (auto DFI = df_begin(EntryBB), E = df_end(EntryBB); DFI != E;) { + BasicBlock *BB = *DFI; + auto RW = RWPerBB.find(BB); + if (RW != RWPerBB.end()) { // There is load or store within the BB. + if (!HasLoadBeforeStore && (RW->second & HasReads)) { + if (RW->second & HasWrites) { + // Determine if load locally dominates store. + auto LorS = find_if(*BB, [&Candidate](Instruction &I) -> bool { + return Candidate.isMyLoadOrStore(&I); + }); + assert(LorS != BB->end()); + HasLoadBeforeStore = isa(*LorS); + } else + HasLoadBeforeStore = true; + } + if (RW->second & HasWrites) { + DFI.skipChildren(); // This path already have store - skipping children. + continue; + } + } + if (isa(BB->getTerminator())) + HasStoreOnEveryPath = false; + + // Short-circuit: all the info is collected - nothing left to do. + if (HasLoadBeforeStore && !HasStoreOnEveryPath) + break; + ++DFI; + } + + auto ValidThreadLocalPtr = [&FAM, F](CallBase *CallInst, Value *ActualPtr, + const ArgPromotionInfo &P) { + Value *Object = getUnderlyingObject(ActualPtr); + if (!isa(Object) && + !isAllocLikeFn(Object, &FAM.getResult(*F))) + return false; + + return !PointerMayBeCapturedBefore( + Object, /* ReturnCaptures */ false, + /* StoreCaptures */ true, CallInst, + &FAM.getResult(*F)); + }; + + if (HasStoreOnEveryPath) { + Candidate.Preload = HasLoadBeforeStore; + Candidate.Return = true; + LLVM_DEBUG(dbgs() << " - has store on every path,"); + } else { + // Preload the value so it can be returned unchanged on some path. + Candidate.Preload = Candidate.Return = + allCallersPass(Candidate, ValidThreadLocalPtr); + LLVM_DEBUG(dbgs() << " - " << (Candidate.Return ? "" : "not") + << " all callers pass a valid thread local ptr,"); + } + return Candidate.Return; +} + +// Fill Candidates with the list of arguments potentially suitable for promotion +static bool +getPromotionCandidates(FunctionAnalysisManager &FAM, Argument *PtrArg, + SmallVectorImpl &Candidates, + bool InArgsOnly) { + LLVM_DEBUG(dbgs() << " Trying arg: " << *PtrArg); + + unsigned NumLoads = 0, NumStores = 0; + Type *ValueTy = nullptr; + Align ArgAlign; // Receives max alignment among the instructions. + for (auto *U : PtrArg->users()) { + Type *InstType = nullptr; + Align InstAlign; + if (auto *LI = dyn_cast(U)) { + if (LI->isSimple()) { + InstType = LI->getType(); + InstAlign = LI->getAlign(); + ++NumLoads; + } + } else if (auto *SI = dyn_cast(U)) { + if (SI->isSimple() && SI->getValueOperand() != PtrArg && !InArgsOnly) { + InstType = SI->getValueOperand()->getType(); + InstAlign = SI->getAlign(); + ++NumStores; + } + } + if (!InstType) { + LLVM_DEBUG(dbgs() << " - unsupported use " << *U << '\n'); + return false; + } + if (!ValueTy) { + if (!InstType->isSingleValueType()) { + LLVM_DEBUG(dbgs() << " - unsupported type " << *InstType << '\n'); + return false; + } + ValueTy = InstType; + } else if (ValueTy != InstType) { + LLVM_DEBUG(dbgs() << " - loads/stores don't agree on the type " << *U + << '\n'); + return false; + } + if (NumStores && PtrArg->hasByValAttr()) { // Skip mutable byval. + LLVM_DEBUG(dbgs() << " - byval has store " << *U << '\n'); + return false; + } + ArgAlign = std::max(ArgAlign, InstAlign); + } + + Candidates.emplace_back(PtrArg, ValueTy, ArgAlign); + if (NumLoads + NumStores) { + auto &C = Candidates.back(); + if (!(NumStores ? isRWCandidate(FAM, C) : isROCandidate(C))) { + Candidates.pop_back(); + LLVM_DEBUG(dbgs() << " discard\n"); + return false; + } + LLVM_DEBUG(dbgs() << " promote as " << C.getKindStr() << " arg\n"); + } else { + // Otherwise - useless argument - to get rid off later. + LLVM_DEBUG(dbgs() << " - unused arg, remove\n"); + } + return true; +} + +class ArgumentPromoter { + Function *F; + FunctionAnalysisManager &FAM; + MemorySSA &MSSA; + unsigned NumMSSAWalksLeft; + SmallPtrSet VisitedMA; + + enum ClobberTestResult { + CheckOtherPhiPath, + ContinueThisPhiPath, + FoundClobber + }; + using ClobberTestFx = enum ClobberTestResult(MemoryAccess *); + + MemoryAccess *getClobber(MemoryAccess *MA, const MemoryLocation &Loc, + function_ref ClobberTest, + SmallPtrSetImpl &Visited); + + MemoryAccess *getClobber(Instruction *I, + function_ref ClobberTest, + SmallPtrSetImpl &Visited); + + MemoryAccess *getInOutArgClobber(const ArgPromotionInfo &ArgInfo); + + using RetValuesMap = + SmallDenseMap, 4>>; + void promoteInOutArg(ArgPromotionInfo &ArgInfo, RetValuesMap &RetValues); + + Type *promoteInOutCandidates( + SmallVectorImpl &Candidates, + SmallVectorImpl &RetValuesStoreOrder); + + bool isInArgClobbered(const ArgPromotionInfo &ArgInfo); + void promoteInArg(ArgPromotionInfo &ArgInfo); + + static Function * + createNewFunction(Function *OldF, Type *RetTy, + const SmallVectorImpl &PromotedArgs); + + static void promoteCallsite( + CallBase &CB, Function *NF, + const SmallVectorImpl &PromotedArgs, + const SmallVectorImpl &RetValuesStoreOrder); + +public: + ArgumentPromoter(Function *F_, FunctionAnalysisManager &FAM_) + : F(F_), FAM(FAM_), MSSA(FAM.getResult(*F).getMSSA()) { + } + + Function *run(SmallVectorImpl &Candidates); +}; + +// Search memory access that clobbers Loc starting from MA. Does a BFS search +// on phi paths. ClobberTest is run over every found clobber to negotiate it +// further by the ClobberTest's return value: +// FoundClobber - stop search and return found clobber; +// ContinueThisPhiPath - skip found clobber and continue searching the path; +// CheckOtherPhiPath - skip found clobber and try other phi paths if any. +// Return found clobber, LiveOnEntryDef if no clobber or nullptr if the maximum +// number of uncached MSSA walks reached. +MemoryAccess * +ArgumentPromoter::getClobber(MemoryAccess *MA, const MemoryLocation &Loc, + function_ref ClobberTest, + SmallPtrSetImpl &Visited) { + std::deque FIFO; + do { + while (true) { + if (!Visited.insert(MA).second) + break; + if (MemoryPhi *Phi = dyn_cast(MA)) { + for (auto *DefMA : make_range(Phi->defs_begin(), Phi->defs_end())) + FIFO.push_back(DefMA); + break; + } + if (--NumMSSAWalksLeft == 0) // Constrain the number of uncached walks. + return nullptr; + auto *ClobberMA = MSSA.getWalker()->getClobberingMemoryAccess(MA, Loc); + if (isa(ClobberMA)) { + MA = ClobberMA; + } else if (!MSSA.isLiveOnEntryDef(ClobberMA)) { + ClobberTestResult R = ClobberTest(ClobberMA); + if (R == FoundClobber) + return ClobberMA; + else if (R == ContinueThisPhiPath) + MA = cast(ClobberMA)->getDefiningAccess(); + else + break; // CheckOtherPhiPath + } + } + if (FIFO.empty()) + break; + MA = FIFO.front(); + FIFO.pop_front(); + } while (true); + return MSSA.getLiveOnEntryDef(); +} + +// Similar the previous routine but searches memory access that clobbers +// memory accessed by the I instruction. +MemoryAccess * +ArgumentPromoter::getClobber(Instruction *I, + function_ref ClobberTest, + SmallPtrSetImpl &Visited) { + assert(MemoryLocation::getOrNone(I).hasValue()); + auto *ClobberMA = MSSA.getWalker()->getClobberingMemoryAccess(I); + if (MSSA.isLiveOnEntryDef(ClobberMA)) + return ClobberMA; + if (isa(ClobberMA)) + return getClobber(ClobberMA, MemoryLocation::get(I), ClobberTest, Visited); + + switch (ClobberTest(ClobberMA)) { + case FoundClobber: + return ClobberMA; + case CheckOtherPhiPath: + break; // No other path to test. + case ContinueThisPhiPath: + return getClobber(cast(ClobberMA)->getDefiningAccess(), + MemoryLocation::get(I), ClobberTest, Visited); + } + return MSSA.getLiveOnEntryDef(); +} + +// TODO: move this to the MemorySSA class +// Find last memory def or phi in the BB or in its dominating predecessors. +// Note that a def in non-dominating predecessor would create phi in the BB. +static MemoryAccess *getLastDef(BasicBlock *BB, MemorySSA &MSSA) { + if (auto *Defs = MSSA.getBlockDefs(BB)) + return const_cast(&*Defs->rbegin()); + + DomTreeNode *Node = MSSA.getDomTree().getNode(BB); + while ((Node = Node->getIDom())) + if (auto *Defs = MSSA.getBlockDefs(Node->getBlock())) + return const_cast(&*Defs->rbegin()); + return MSSA.getLiveOnEntryDef(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD +static void printClobber(raw_ostream &os, MemoryAccess *ClobberMA, + Instruction *I) { + if (!ClobberMA) { + os << "clobber search reached limit\n"; + return; + } + auto *ClobberI = cast(ClobberMA)->getMemoryInst(); + os << "found clobber:" << *I << '@' << I->getParent()->getName() + << " is clobbered by" << *ClobberI << '@' + << ClobberI->getParent()->getName() << '\n'; +} +#endif + +// Check if memops by the argument are clobbered by or clobber other memops. +// Return found clobber, LiveOnEntryDef if no clobber or nullptr if the maximum +// number of uncached MSSA walks reached. +MemoryAccess * +ArgumentPromoter::getInOutArgClobber(const ArgPromotionInfo &ArgInfo) { + LLVM_DEBUG(dbgs() << " Searching for a clobber for " << ArgInfo.getKindStr() + << " arg " << *ArgInfo.Arg << ": "); + auto SkipMyStore = [&ArgInfo](MemoryAccess *MA) -> ClobberTestResult { + return ArgInfo.isMyStore(cast(MA)->getMemoryInst()) + ? CheckOtherPhiPath + : FoundClobber; + }; + VisitedMA.clear(); // Using VisitedMA to track SkipMyStore condition tests. + // Check if a load by the argument is clobbered by something else than + // a store by the argument. + for (Value *U : ArgInfo.users()) { + assert(ArgInfo.isMyLoadOrStore(U)); + if (LoadInst *LI = dyn_cast(U)) { + auto *Clob = getClobber(LI, SkipMyStore, VisitedMA); + if (!MSSA.isLiveOnEntryDef(Clob)) { + LLVM_DEBUG(printClobber(dbgs(), Clob, LI)); + return Clob; + } + } + } + // Check if the argument has been clobbered between last store by the arg + // and return on any path. + MemoryLocation Loc(ArgInfo.getMemLoc()); + for (auto &BB : *F) { + if (!isa(BB.getTerminator())) + continue; + auto *Clob = getClobber(getLastDef(&BB, MSSA), Loc, SkipMyStore, VisitedMA); + if (!MSSA.isLiveOnEntryDef(Clob)) { + LLVM_DEBUG(printClobber(dbgs(), Clob, BB.getTerminator())); + return Clob; + } + } + // Check if any other load is clobbered by a store by the argument. + AliasAnalysis &AA = FAM.getResult(*F); + for (auto &BB : *F) { + if (auto *L = MSSA.getBlockAccesses(&BB)) { + for (auto &MA : *L) { + if (auto *MU = dyn_cast(&MA)) { + Instruction *UseI = MU->getMemoryInst(); + if (ArgInfo.isMyLoad(UseI)) + continue; + auto UseLoc = MemoryLocation::getOrNone(UseI); + if (!UseLoc.hasValue()) { + LLVM_DEBUG(dbgs() << "cannot get memloc for " << *UseI << '\n'); + // Conservatively consider this as a clobber. + return const_cast(MU); + } + auto FindMyStore = [&](MemoryAccess *MA) -> ClobberTestResult { + Instruction *DefI = cast(MA)->getMemoryInst(); + if (ArgInfo.isMyStore(DefI)) + return FoundClobber; + // If the UseI's location is definitely overwritten with the clober + // we can skip this path, otherwise it can be clobbered earlier. + ModRefInfo MRI = AA.getModRefInfo(DefI, UseLoc); + return (isMustSet(MRI) && isModSet(MRI)) ? CheckOtherPhiPath + : ContinueThisPhiPath; + }; + VisitedMA.clear(); + auto *Clob = getClobber(UseI, FindMyStore, VisitedMA); + if (!MSSA.isLiveOnEntryDef(Clob)) { + LLVM_DEBUG(printClobber(dbgs(), Clob, UseI)); + return Clob; + } + } + } + } + } + LLVM_DEBUG(dbgs() << "no clobber\n"); + return MSSA.getLiveOnEntryDef(); +} + +// Annotate each return with a value for the argument ArgInfo. +// Create Phis and rewrites code. +void ArgumentPromoter::promoteInOutArg(ArgPromotionInfo &ArgInfo, + RetValuesMap &RetValues) { + SmallDenseMap, 16> MemInsts; + SmallPtrSet DefBB; + for (Value *U : ArgInfo.users()) { + assert(ArgInfo.isMyLoadOrStore(U)); + + Instruction *I = cast(U); + if (MemInsts.empty()) + ArgInfo.AAMD = I->getAAMetadata(); + else if (ArgInfo.AAMD) // Merging AA metadata BTW. + ArgInfo.AAMD.merge(I->getAAMetadata()); + + BasicBlock *BB = I->getParent(); + if (isa(I)) + DefBB.insert(BB); + MemInsts[BB].push_back(I); + } + + SmallDenseMap, 16> BBExitValue; + + // Processing stores. + for (BasicBlock *BB : DefBB) { + auto &BBMemInsts = MemInsts[BB]; + // Sort mem instructions in the program order. + sort(BBMemInsts, [this](Instruction *A, Instruction *B) { + return MSSA.locallyDominates(MSSA.getMemoryAccess(A), + MSSA.getMemoryAccess(B)); + }); + // Propagate store values down to the end of the basic block, + // loads preceding the first store will be processed later. + auto FirstStore = + find_if(BBMemInsts, [](Instruction *I) { return isa(I); }); + assert(FirstStore != BBMemInsts.end()); + Value *V = nullptr; + for (Instruction *I : make_range(FirstStore, BBMemInsts.end())) { + if (isa(I)) { + assert(V); // Since we started with a store. + I->replaceAllUsesWith(V); + } else + V = cast(I)->getValueOperand(); + } + assert(V); + BBExitValue[BB] = V; + } + + SmallDenseMap, 16> BBEntryValue; + auto setEntryValue = [&](BasicBlock *BB, Value *V) { + BBEntryValue[BB] = V; + // Keep BBExitValue left from store processing. + BBExitValue.try_emplace(BB, V); + }; + + if (ArgInfo.Preload) + setEntryValue(&F->getEntryBlock(), ArgInfo.getOrCreatePreloadArgDummy()); + + { // Inserting phis. + SmallVector PHIBlocks; + ForwardIDFCalculator IDF(MSSA.getDomTree()); + IDF.setDefiningBlocks(DefBB); + IDF.calculate(PHIBlocks); + + for (auto *JoinBB : PHIBlocks) { + auto P = MemInsts.find(JoinBB); + // If JoinBB starts with a store then phi value isn't used. + if (P == MemInsts.end() || isa(P->second.front())) { + PHINode *Phi = PHINode::Create(ArgInfo.ArgType, 2, + ArgInfo.getParamName() + + dot(JoinBB->getName()) + ".phi", + &JoinBB->front()); + setEntryValue(JoinBB, Phi); + } + } + } + + auto findIncomingValue = [&](BasicBlock *BB) -> Value * { + DomTreeNode *Node = MSSA.getDomTree().getNode(BB); + while ((Node = Node->getIDom())) { + auto I = BBExitValue.find(Node->getBlock()); + if (I != BBExitValue.end()) + return I->second; + } + return UndefValue::get(ArgInfo.ArgType); + }; + + auto getBBExitValue = [&](BasicBlock *BB) -> Value * { + auto I = BBExitValue.find(BB); + if (I != BBExitValue.end()) + return I->second; + return findIncomingValue(BB); + }; + + auto getBBEntryValue = [&](BasicBlock *BB) -> Value * { + auto I = BBEntryValue.find(BB); + if (I != BBEntryValue.end()) + return I->second; + return findIncomingValue(BB); + }; + + // Processing phis. + const DataLayout &DL = F->getParent()->getDataLayout(); + for (auto &P : BBEntryValue) + if (PHINode *Phi = dyn_cast(&*P.second)) { + for (BasicBlock *PredBB : predecessors(P.first)) + Phi->addIncoming(getBBExitValue(PredBB), PredBB); + + if (Value *V = SimplifyInstruction(Phi, DL)) { + Phi->replaceAllUsesWith(V); + Phi->eraseFromParent(); + } + } + + // Processing loads. + for (auto &P : MemInsts) { + auto &BBMemInsts = P.second; + if (!isa(BBMemInsts.front())) + continue; + Value *V = getBBEntryValue(P.first); + auto I = BBMemInsts.begin(), E = BBMemInsts.end(); + do { + (*I)->replaceAllUsesWith(V); + } while (++I != E && isa(*I)); + } + + // Annotate returns. + for (BasicBlock &BB : *F) + if (auto *RetInst = dyn_cast(BB.getTerminator())) + RetValues[RetInst].push_back(getBBExitValue(&BB)); + + // Finally erase load/stores. + MemorySSAUpdater UMSSA(&MSSA); + for (Value *U : make_early_inc_range(ArgInfo.users())) { + assert(ArgInfo.isMyLoadOrStore(U)); + UMSSA.removeMemoryAccess(cast(U)); + cast(U)->eraseFromParent(); + } +#ifndef NDEBUG + MSSA.verifyMemorySSA(); +#endif +} + +// Tries to promote [input/]output ptr arguments. It may happen that store +// instructions for several arguments clobber one another, to solve this +// an attempt to find an "unclobbering" promotion sequence is made. +// For example: +// store PtrArgA(may alias), 1; +// store PtrArgB(may alias), 0; <- clobbers store PtrArgA +// +// First PtrArgB is promoted unclobbering PtrArgA which is promoted second. +// Notice that it is only possible if such stores obey the same order in every +// basic block, otherwise we cannot unclobber these at all. Promoted stores are +// then placed in the caller in the same order making the transformation safe. +// +// This could be left for the following passes but it's better to perform such +// unclobbering all at once not only because of compilation speed but it also +// allows to simplify the return value of the function: otherwise we would have +// to deal with an onion-like aggregated return type with a bulky INSERT_VALUE/ +// EXTRACT_VALUE sequence. +Type *ArgumentPromoter::promoteInOutCandidates( + SmallVectorImpl &Candidates, + SmallVectorImpl &RetValuesStoreOrder) { + + // Priority queue is ordered so that clobbered candidates pop last. + struct ClobberedPopLast { + // Returns true if its first argument comes before its second argument in a + // weak ordering. But because the priority queue outputs largest elements + // first, the elements that "come before" are actually output last. + bool operator()(const ArgPromotionInfo *A1, + const ArgPromotionInfo *A2) const { + assert(!A1->isClobberedBy(*A2) || !A2->isClobberedBy(*A1)); + return A1->isClobberedBy(*A2); + } + }; + struct CandidateQueue + : std::priority_queue, + ClobberedPopLast> { + CandidateQueue(SmallVectorImpl &Candidates) { + // This might seem as a dirty hack but until ClobberedBy is set no order + // on candidates can be established, so just store them as is + for (auto &C : Candidates) { + if (C.Return) { + assert(!C.ClobberedBy); // but let's be carefull. + c.push_back(&C); + } + } + } + // This is placed here because priority_queue container is protected. + ArgPromotionInfo *findClobber(StoreInst *SI) const { + auto Clobber = + std::find_if(c.begin(), c.end(), [SI](const ArgPromotionInfo *A) { + return A->isMyStore(SI); + }); + return Clobber != c.end() ? *Clobber : nullptr; + } + } Queue(Candidates); + + RetValuesMap RetValues; + unsigned NumPromoted = 0; + while (!Queue.empty()) { + ArgPromotionInfo &C = *Queue.top(); + Queue.pop(); + if (C.ClobberedBy && !C.ClobberedBy->isPromoted()) // [1] + continue; // the clobber isn't gone + MemoryAccess *ClobberMA = getInOutArgClobber(C); + if (MSSA.isLiveOnEntryDef(ClobberMA)) { + promoteInOutArg(C, RetValues); + // ReturnValueIndex is used as the index of the arg's value in the map + // up until this function's exit, see below. + C.ReturnValueIndex = NumPromoted++; + continue; + } + MemoryDef *MDef = dyn_cast_or_null(ClobberMA); + if (!MDef) + continue; + // If the clobbering store belongs to another candidate in the queue + // enqueue the current candidate back with the ClobberedBy set so we can + // retry it after the clobbering candidate has been promoted. + StoreInst *SI = dyn_cast(MDef->getMemoryInst()); + if (!SI || !SI->isSimple() || C.isMyStore(SI)) + continue; + if (ArgPromotionInfo *Clobber = Queue.findClobber(SI)) { + C.ClobberedBy = Clobber; + if (!Clobber->isClobberedBy(C)) + Queue.push(&C); + // Otherwise this is a circular dependency, other candidates will be + // removed by the condition [1]. + } + } + + Type *OldRetTy = F->getReturnType(); + if (!NumPromoted) + return OldRetTy; + + SmallVector ReturnArgTypes; + ReturnArgTypes.reserve(NumPromoted + 1); + if (!OldRetTy->isVoidTy()) + ReturnArgTypes.push_back(OldRetTy); + + SmallVector ReturnArgs; + ReturnArgs.reserve(NumPromoted); + for (ArgPromotionInfo &C : Candidates) { + if (C.isPromoted()) { + assert(C.Return); + ReturnArgs.push_back(&C); + ReturnArgTypes.push_back(C.ArgType); + } + } + + Type *RetTy = ReturnArgTypes.size() > 1 + ? StructType::get(F->getContext(), ReturnArgTypes) + : ReturnArgTypes.front(); + + // Replace old return instructions using annotated return values. + for (auto &P : RetValues) { + ReturnInst *OldRetInst = P.first; + const auto &Values = P.second; + assert(Values.size() == NumPromoted); + Value *RetValue; + if (OldRetTy->isVoidTy() && NumPromoted == 1) + RetValue = Values[0]; + else { + SmallString<256> NameData; + StringRef Name = + (F->getName() + dot(OldRetInst->getParent()->getName()) + ".ret") + .toStringRef(NameData); + RetValue = UndefValue::get(RetTy); + unsigned I = 0; + if (!OldRetTy->isVoidTy()) { + RetValue = InsertValueInst::Create( + RetValue, OldRetInst->getReturnValue(), {I++}, Name, OldRetInst); + } + for (const ArgPromotionInfo *C : ReturnArgs) { + RetValue = + InsertValueInst::Create(RetValue, Values[C->ReturnValueIndex], {I}, + Name + Twine(I), OldRetInst); + ++I; + } + } + ReturnInst::Create(OldRetInst->getContext(), RetValue, OldRetInst); + OldRetInst->eraseFromParent(); + } + + RetValuesStoreOrder.resize(NumPromoted); + for (unsigned I = 0; I < NumPromoted; I++) { + ArgPromotionInfo *C = ReturnArgs[I]; + RetValuesStoreOrder[NumPromoted - 1 - C->ReturnValueIndex] = C; + // ReturnValueIndex is now the index in the aggregated return type. + C->ReturnValueIndex = I + (OldRetTy->isVoidTy() ? 0 : 1); + } + return RetTy; +} + +bool ArgumentPromoter::isInArgClobbered(const ArgPromotionInfo &ArgInfo) { + LLVM_DEBUG(dbgs() << " Searching for a clobber for in arg " << *ArgInfo.Arg + << ": "); + assert(!ArgInfo.Return && ArgInfo.Preload); + auto *Walker = MSSA.getWalker(); + for (Value *U : ArgInfo.users()) { + assert(ArgInfo.isMyLoad(U)); + LoadInst *LI = cast(U); + auto *ClobberMA = Walker->getClobberingMemoryAccess(LI); + if (!MSSA.isLiveOnEntryDef(ClobberMA)) { + LLVM_DEBUG(printClobber(dbgs(), ClobberMA, LI)); + return true; + } + } + LLVM_DEBUG(dbgs() << "no clobber\n"); + return false; +} + +void ArgumentPromoter::promoteInArg(ArgPromotionInfo &ArgInfo) { + assert(!ArgInfo.Return && ArgInfo.Preload); + MemorySSAUpdater UMSSA(&MSSA); + bool FirstAAMD = true; + for (Value *U : make_early_inc_range(ArgInfo.users())) { + assert(ArgInfo.isMyLoad(U)); + LoadInst *LI = cast(U); + if (FirstAAMD) { + ArgInfo.AAMD = LI->getAAMetadata(); + FirstAAMD = false; + } else if (ArgInfo.AAMD) + ArgInfo.AAMD.merge(LI->getAAMetadata()); + LI->replaceAllUsesWith(ArgInfo.getOrCreatePreloadArgDummy()); + UMSSA.removeMemoryAccess(LI); + LI->eraseFromParent(); + } +#ifndef NDEBUG + MSSA.verifyMemorySSA(); +#endif +} + +// Create the function with the new signature. +Function *ArgumentPromoter::createNewFunction( + Function *OldF, Type *RetTy, + const SmallVectorImpl &PromotedArgs) { + + SmallVector Params; + SmallVector ParamAttr; + AttributeList PAL = OldF->getAttributes(); + auto PA = PromotedArgs.begin(); + for (unsigned ArgNo = 0; ArgNo < OldF->arg_size(); ++ArgNo) { + if (PA != PromotedArgs.end() && (*PA)->getArgNo() == ArgNo) { + assert((*PA)->isPromoted() || (*PA)->isUnusedArg()); + if ((*PA)->PreloadArgDummy) { + Params.push_back((*PA)->ArgType); + ParamAttr.push_back(AttributeSet()); + } + ++PA; + } else { + Params.push_back(OldF->getArg(ArgNo)->getType()); + ParamAttr.push_back(PAL.getParamAttrs(ArgNo)); + } + } + assert(PA == PromotedArgs.end()); + + FunctionType *OldFTy = OldF->getFunctionType(); + FunctionType *NFTy = FunctionType::get(RetTy, Params, OldFTy->isVarArg()); + Function *NF = Function::Create(NFTy, OldF->getLinkage(), + OldF->getAddressSpace(), OldF->getName()); + NF->copyAttributesFrom(OldF); + NF->copyMetadata(OldF, 0); + NF->setAttributes(AttributeList::get(OldF->getContext(), PAL.getFnAttrs(), + PAL.getRetAttrs(), ParamAttr)); + + // The new function will have the !dbg metadata copied from the original + // function. The original function may not be deleted, and dbg metadata need + // to be unique so we need to drop it. + OldF->setSubprogram(nullptr); + OldF->getParent()->getFunctionList().insert(OldF->getIterator(), NF); + NF->takeName(OldF); + NF->getBasicBlockList().splice(NF->begin(), OldF->getBasicBlockList()); + + auto NewArgI = NF->arg_begin(); + PA = PromotedArgs.begin(); + for (unsigned ArgNo = 0; ArgNo < OldF->arg_size(); ++ArgNo) { + Argument &OldArg = *OldF->getArg(ArgNo); + if (PA != PromotedArgs.end() && (*PA)->getArgNo() == ArgNo) { + assert((*PA)->isPromoted() || (*PA)->isUnusedArg()); + if ((*PA)->PreloadArgDummy) { + (*PA)->PreloadArgDummy->replaceAllUsesWith(NewArgI); + NewArgI->setName((*PA)->getParamName()); + // Replace potential metadata uses (like llvm.dbg.value) with undef. + OldArg.replaceAllUsesWith(UndefValue::get(OldArg.getType())); + ++NewArgI; + } + ++PA; + } else { + OldArg.replaceAllUsesWith(&*NewArgI); + NewArgI->takeName(&OldArg); + ++NewArgI; + } + } + assert(PA == PromotedArgs.end()); + return NF; +} + +// Promote callsite to call the new function signature inserting loads and +// stores before and after the callsite. +void ArgumentPromoter::promoteCallsite( + CallBase &CB, Function *NF, + const SmallVectorImpl &PromotedArgs, + const SmallVectorImpl &RetValuesStoreOrder) { + + SmallVector Args; + SmallVector ArgsAttr; + const AttributeList &CallPAL = CB.getAttributes(); + IRBuilder IRB(&CB); + auto PA = PromotedArgs.begin(); + for (unsigned ArgNo = 0; ArgNo < CB.arg_size(); ++ArgNo) { + Value *CallOp = CB.getArgOperand(ArgNo); + if (PA != PromotedArgs.end() && (*PA)->getArgNo() == ArgNo) { + assert((*PA)->isPromoted() || (*PA)->isUnusedArg()); + if ((*PA)->PreloadArgDummy) { + Args.push_back((*PA)->createLoad(IRB, CallOp, CallOp->getName())); + ArgsAttr.push_back(AttributeSet()); + } + ++PA; + } else { + Args.push_back(CallOp); + ArgsAttr.push_back(CallPAL.getParamAttrs(ArgNo)); + } + } + assert(PA == PromotedArgs.end()); + + SmallVector OpBundles; + CB.getOperandBundlesAsDefs(OpBundles); + CallBase *NewCS = nullptr; + if (InvokeInst *II = dyn_cast(&CB)) { + NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), + Args, OpBundles, "", &CB); + } else { + auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", &CB); + NewCall->setTailCallKind(cast(&CB)->getTailCallKind()); + NewCS = NewCall; + } + NewCS->setCallingConv(CB.getCallingConv()); + NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); + NewCS->takeName(&CB); + NewCS->setAttributes(AttributeList::get( + NF->getContext(), CallPAL.getFnAttrs(), CallPAL.getRetAttrs(), ArgsAttr)); + + if (RetValuesStoreOrder.empty()) { + CB.replaceAllUsesWith(NewCS); + return; + } + + // Processing return values. + bool OldRetTyIsVoid = CB.getCalledFunction()->getReturnType()->isVoidTy(); + if (OldRetTyIsVoid && RetValuesStoreOrder.size() == 1) { + const ArgPromotionInfo *A = RetValuesStoreOrder.front(); + A->createStore(IRB, NewCS, CB.getArgOperand(A->getArgNo())); + } else { + if (!OldRetTyIsVoid && !CB.user_empty()) + CB.replaceAllUsesWith( + IRB.CreateExtractValue(NewCS, {0}, NewCS->getName() + ".ret")); + for (const ArgPromotionInfo *A : RetValuesStoreOrder) { + Value *CallOp = CB.getArgOperand(A->getArgNo()); + Value *RetVal = IRB.CreateExtractValue(NewCS, {A->ReturnValueIndex}, + CallOp->getName() + ".val.ret"); + A->createStore(IRB, RetVal, CallOp); + } + } +} + +// Try to promote function argument candidates and update callsites. +Function *ArgumentPromoter::run(SmallVectorImpl &Candidates) { + // Reload MSSA uncached walks constraint. + NumMSSAWalksLeft = MaxMSSAWalksNum * Candidates.size(); + + SmallVector RetValuesStoreOrder; + Type *RetType = promoteInOutCandidates(Candidates, RetValuesStoreOrder); + + SmallVector PromotedArgs; + for (ArgPromotionInfo &C : Candidates) { + if (C.Return) { + ++NumInOutArgCandidates; + if (C.isPromoted()) { + PromotedArgs.push_back(&C); + ++NumInOutArgPromoted; + } + } else if (C.Preload) { + ++NumInArgCandidates; + if (!isInArgClobbered(C)) { + promoteInArg(C); + PromotedArgs.push_back(&C); + ++NumInArgPromoted; + } + } else { + assert(C.isUnusedArg()); + PromotedArgs.push_back(&C); // Will be removed from the func signature. + } + } + + if (PromotedArgs.empty()) + return nullptr; + + Function *NF = createNewFunction(F, RetType, PromotedArgs); + + // Update callsites. + for (auto *U : make_early_inc_range(F->users())) { + assert(isa(U)); + CallBase &CB = *cast(U); + assert(CB.getCalledFunction() == F && CB.getParent()->getParent() != F); + promoteCallsite(CB, NF, PromotedArgs, RetValuesStoreOrder); + CB.eraseFromParent(); + } + return NF; +} + +// This method checks the specified function to see if there're any +// promotable arguments and if it is safe to promote the function (for +// example, all callers are direct) and performs the promotion. +static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM) { + if (F->hasOptNone()) + return nullptr; + + // Don't perform argument promotion for naked functions; otherwise we can end + // up removing parameters that are seemingly 'not used' as they are referred + // to in the assembly. + if (F->hasFnAttribute(Attribute::Naked)) + return nullptr; + + // Make sure that it is local to this module. + if (!F->hasLocalLinkage()) + return nullptr; + + // Don't promote arguments for variadic functions. Adding, removing, or + // changing non-pack parameters can change the classification of pack + // parameters. Frontends encode that classification at the call site in the + // IR, while in the callee the classification is determined dynamically based + // on the number of registers consumed so far. + if (F->isVarArg()) + return nullptr; + + // Don't transform functions that receive inallocas, as the transformation may + // not be safe depending on calling convention. + if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca)) + return nullptr; + + // See if there are any pointer arguments. + if (F->args().end() == find_if(F->args(), [](Argument &A) { + return A.getType()->isPointerTy(); + })) + return nullptr; + + LLVM_DEBUG(dbgs() << "Trying to promote arguments for " << F->getName() + << '\n'); + + // If the function has attributes for the return value they most likely + // would not make sense for the aggregated return value, so we discard any + // in/out arguments. The same applies to the return attributes at callsites. + bool InArgsOnly = F->getAttributes().getRetAttrs().hasAttributes(); + + for (Use &U : F->uses()) { + CallBase *CB = dyn_cast(U.getUser()); + // Must be a direct call. + if (CB == nullptr || !CB->isCallee(&U)) // [1] + return nullptr; + + // Can't change signature of musttail callee + if (CB->isMustTailCall()) + return nullptr; + + if (!InArgsOnly && CB->getAttributes().getRetAttrs().hasAttributes()) + InArgsOnly = true; + } + + // Can't change signature of musttail caller + for (BasicBlock &BB : *F) + if (BB.getTerminatingMustTailCall()) + return nullptr; + + SmallVector Candidates; + for (Argument &A : F->args()) + if (A.getType()->isPointerTy()) + getPromotionCandidates(FAM, &A, Candidates, InArgsOnly); + + if (Candidates.empty()) + return nullptr; + + { // Make sure preloaded arguments are ABI compatible. + // TODO: Check individual arguments so we can promote a subset? + SmallVector Types; + for (auto &C : Candidates) { + if (C.Preload) + Types.push_back(C.ArgType); + } + if (!Types.empty()) { + const TargetTransformInfo &TTI = FAM.getResult(*F); + for (const Use &U : F->uses()) { + CallBase *CB = cast(U.getUser()); // due to check [1] + if (!TTI.areTypesABICompatible(CB->getCaller(), F, Types)) + return nullptr; + } + } + } + + return ArgumentPromoter(F, FAM).run(Candidates); +} + +PreservedAnalyses MSSAArgPromotionPass::run(LazyCallGraph::SCC &C, + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &UR) { + bool Changed = false, LocalChange; + do { // Iterate until we stop promoting from this SCC. + LocalChange = false; + for (LazyCallGraph::Node &N : C) { + Function &OldF = N.getFunction(); + FunctionAnalysisManager &FAM = + AM.getResult(C, CG).getManager(); + if (Function *NewF = promoteArguments(&OldF, FAM)) { + // Directly substitute the functions in the call graph. Note that this + // requires the old function to be completely dead and completely + // replaced by the new function. It does no call graph updates, it + // merely swaps out the particular function mapped to a particular node + // in the graph. + C.getOuterRefSCC().replaceNodeFunction(N, *NewF); + FAM.clear(OldF, OldF.getName()); + OldF.eraseFromParent(); + LocalChange = true; + } + } + Changed |= LocalChange; + } while (LocalChange); + + if (!Changed) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); // Since the function signature is changed. +} + +namespace { +struct MSSAArgPromotion : public CallGraphSCCPass { + static char ID; + + FunctionAnalysisManager FAM; + + explicit MSSAArgPromotion() : CallGraphSCCPass(ID) { + initializeMSSAArgPromotionPass(*PassRegistry::getPassRegistry()); + FAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + FAM.registerPass([&] { return TargetIRAnalysis(); }); + FAM.registerPass([&] { return TargetLibraryAnalysis(); }); + FAM.registerPass([&] { return AAManager(); }); + FAM.registerPass([&] { return DominatorTreeAnalysis(); }); + FAM.registerPass([&] { return MemorySSAAnalysis(); }); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + CallGraphSCCPass::getAnalysisUsage(AU); + } + + bool runOnSCC(CallGraphSCC &SCC) override; +}; +} // end anonymous namespace + +char MSSAArgPromotion::ID = 0; + +INITIALIZE_PASS_BEGIN(MSSAArgPromotion, "mssaargpromotion", + "MSSA Promote 'by reference' arguments to scalars", false, + false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_END(MSSAArgPromotion, "mssaargpromotion", + "MSSA Promote 'by reference' arguments to scalars", false, + false) + +Pass *llvm::createMSSAArgPromotionPass() { return new MSSAArgPromotion(); } + +bool MSSAArgPromotion::runOnSCC(CallGraphSCC &SCC) { + if (skipSCC(SCC)) + return false; + + CallGraph &CG = getAnalysis().getCallGraph(); + bool Changed = false, LocalChange; + do { + LocalChange = false; + for (CallGraphNode *OldNode : SCC) { + Function *OldF = OldNode->getFunction(); + if (!OldF) + continue; + + // Clear FAM but preserve immutable results. + FAM.invalidate(*OldF, PreservedAnalyses::none()); + + if (Function *NewF = promoteArguments(OldF, FAM)) { + LocalChange = true; + + // Update the call graph for the newly promoted function. + CallGraphNode *NewNode = CG.getOrInsertFunction(NewF); + NewNode->stealCalledFunctionsFrom(OldNode); + + // Update call edges + SmallDenseSet ClearedNodes; + for (auto *U : make_early_inc_range(NewF->users())) { + assert(isa(U)); + CallBase &CB = *cast(U); + CallGraphNode *CallerNode = CG[CB.getParent()->getParent()]; + if (ClearedNodes.insert(CallerNode).second) + CallerNode->removeAnyCallEdgeTo(OldNode); + CallerNode->addCalledFunction(&CB, NewNode); + } + assert(OldNode->getNumReferences() == 0); + delete CG.removeFunctionFromModule(OldNode); + SCC.ReplaceNode(OldNode, NewNode); + } + } + Changed |= LocalChange; + } while (LocalChange); + return Changed; +} diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -770,6 +770,7 @@ ; GCN-O3-NEXT: OpenMP specific optimizations ; GCN-O3-NEXT: Deduce function attributes ; GCN-O3-NEXT: Promote 'by reference' arguments to scalars +; GCN-O3-NEXT: MSSA Promote 'by reference' arguments to scalars ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) diff --git a/llvm/test/Transforms/ArgumentPromotion/align.ll b/llvm/test/Transforms/ArgumentPromotion/align.ll --- a/llvm/test/Transforms/ArgumentPromotion/align.ll +++ b/llvm/test/Transforms/ArgumentPromotion/align.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; RUN: opt -S -argpromotion < %s | FileCheck %s +; RUN: opt -S -mssaargpromotion < %s | FileCheck %s define internal i32 @callee_must_exec(i32* %p) { ; CHECK-LABEL: define {{[^@]+}}@callee_must_exec diff --git a/llvm/test/Transforms/ArgumentPromotion/inoutargs.ll b/llvm/test/Transforms/ArgumentPromotion/inoutargs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/inoutargs.ll @@ -0,0 +1,1103 @@ +; RUN: opt < %s -passes=mssaargpromotion -S | FileCheck %s + +;------- chain of calls +; CHECK-LABEL: define internal i32 @inner_fx(i32 %P.0.val) { +; CHECK-NEXT: %V = add i32 %P.0.val, 1 +; CHECK-NEXT: ret i32 %V +define internal void @inner_fx(i32* %P) { + %L = load i32, i32* %P; + %V = add i32 %L, 1; + store i32 %V, i32* %P + ret void +} + +; CHECK-LABEL: define internal i32 @outer_fx(i32 %P.0.val) { +; CHECK-NEXT: %V1 = add i32 %P.0.val, 2 +; CHECK-NEXT: %1 = call i32 @inner_fx(i32 %V1) +; CHECK-NEXT: %V2 = add i32 %1, 3 +; CHECK-NEXT: ret i32 %V2 +define internal void @outer_fx(i32* %P) { + %L1 = load i32, i32* %P; + %V1 = add i32 %L1, 2; + store i32 %V1, i32* %P + call void @inner_fx(i32* %P) + %L2 = load i32, i32* %P; + %V2 = add i32 %L2, 3; + store i32 %V2, i32* %P + ret void +} + +; CHECK-LABEL: define void @test_chain_of_calls(i32* %P) { +; CHECK-NEXT: %P.val = load i32, i32* %P, align 4 +; CHECK-NEXT: %1 = call i32 @outer_fx(i32 %P.val) +; CHECK-NEXT: store i32 %1, i32* %P, align 4 +define void @test_chain_of_calls(i32* %P) { + call void @outer_fx(i32* %P) + ret void +} + + +;------- +;CHECK-LABEL: define internal { i32, i32 } @test_not_all_path_store(i1 %c, i32 %P.0.val) { +;CHECK-NEXT: br i1 %c, label %exit1, label %exit2 +;CHECK-LABEL: exit1: +;CHECK-NEXT: %test_not_all_path_store.exit1.ret = insertvalue { i32, i32 } undef, i32 1, 0 +;CHECK-NEXT: %test_not_all_path_store.exit1.ret1 = insertvalue { i32, i32 } %test_not_all_path_store.exit1.ret, i32 42, 1 +;CHECK-NEXT: ret { i32, i32 } %test_not_all_path_store.exit1.ret1 +;CHECK-LABEL: exit2: +;CHECK-NEXT: %test_not_all_path_store.exit2.ret = insertvalue { i32, i32 } undef, i32 2, 0 +;CHECK-NEXT: %test_not_all_path_store.exit2.ret1 = insertvalue { i32, i32 } %test_not_all_path_store.exit2.ret, i32 %P.0.val, 1 +;CHECK-NEXT: ret { i32, i32 } %test_not_all_path_store.exit2.ret1 +define internal i32 @test_not_all_path_store(i1 %c, i32* %P) { + br i1 %c, label %exit1, label %exit2 + +exit1: + store i32 42, i32* %P + ret i32 1 + +exit2: + ret i32 2 +} + +;CHECK-LABEL: define i32 @test_not_all_path_store_caller(i1 %c) { +;CHECK-NEXT: %M = alloca i32, align 4 +;CHECK-NEXT: %M.val = load i32, i32* %M, align 4 +;CHECK-NEXT: %R = call { i32, i32 } @test_not_all_path_store(i1 %c, i32 %M.val) +;CHECK-NEXT: %R.ret = extractvalue { i32, i32 } %R, 0 +;CHECK-NEXT: %M.val.ret = extractvalue { i32, i32 } %R, 1 +;CHECK-NEXT: store i32 %M.val.ret, i32* %M, align 4 +;CHECK-NEXT: %V = load i32, i32* %M, align 4 +;CHECK-NEXT: %Sum = add i32 %R.ret, %V +;CHECK-NEXT: ret i32 %Sum +define i32 @test_not_all_path_store_caller(i1 %c) { + %M = alloca i32; + %R = call i32 @test_not_all_path_store(i1 %c, i32* %M) + %V = load i32, i32* %M + %Sum = add i32 %R, %V + ret i32 %Sum +} + +;------- test that clobber of L2 load by P1 store is detected +;CHECK-LABEL: define internal void @test_getInOutArgClobber_visited +define internal void @test_getInOutArgClobber_visited(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 + br i1 %c, label %left, label %right + +;CHECK-LABEL: left: +;CHECK-NEXT: store +left: + store i32 1, i32* %P1 ; clobbers L2 load + br i1 %c, label %exit1, label %exit2 + +right: + br i1 %c, label %exit1, label %exit2 + +exit1: + %L1 = load i32, i32* %P1 + ret void + +exit2: + %L2 = load i32, i32* %P2 + ret void +} + +define void @test_getInOutArgClobber_visited_caller(i1 %c, i32* %P2) { + %M = alloca i32 + call void @test_getInOutArgClobber_visited(i1 %c, i32* %M, i32* %P2); + ret void +} + +;------- check store clobbering other loads + +;CHECK-LABEL: define internal void @test_store_clobber1 +define internal void @test_store_clobber1(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias +;CHECK: store i32 42, i32* %P1 +;CHECK: store i32 1, i32* %P3 +;CHECK: %V2 = load i32, i32* %P2 + store i32 42, i32* %P1 ; this store clobbers V2 load (e.g. P1 == P2 != P3) + store i32 1, i32* %P3 + %V2 = load i32, i32* %P2 + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P3 store + ret void +} + +define void @test_store_clobber1_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_clobber1(i32* %P1, i32* %P2, i32* %P3) + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_store_no_clobber1 +define internal void @test_store_no_clobber1(i32* %P1, i32* %P2) { ; P1 may alias P2 + store i32 42, i32* %P1 + store i32 1, i32* %P2 + %V2 = load i32, i32* %P2 + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P2 store + ret void +} + +;CHECK-LABEL: define void @test_store_no_clobber1_caller +define void @test_store_no_clobber1_caller(i32* %P1, i32* %P2) { + call void @test_store_no_clobber1(i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_store_no_clobber1 +; store P2 and then P1 to preserve order in @test_store_no_clobber1 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_store_diamond_clobber1 +define internal void @test_store_diamond_clobber1(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 + br i1 %c, label %st1, label %st2 +;CHECK-LABEL: st1: +;CHECK-NEXT: br +st1: + store i32 42, i32* %P1 + br label %exit + +st2: + br label %exit + +exit: + store i32 1, i32* %P2 + %V2 = load i32, i32* %P2 + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P2 store + ret void +} + +;CHECK-LABEL: define void @test_store_diamond_clobber1_caller +define void @test_store_diamond_clobber1_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_store_diamond_clobber1(i1 %c, i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_store_diamond_clobber1 +; store P2 and then P1 to preserve order in @test_store_diamond_clobber1 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_store_diamond_clobber2 +define internal void @test_store_diamond_clobber2(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-NEXT: br + store i32 42, i32* %P1 + store i32 1, i32* %P2 + %V2 = load i32, i32* %P2 + br i1 %c, label %st1, label %st2 + +st1: + br label %exit + +st2: + br label %exit + +exit: + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P2 store + ret void +} + +;CHECK-LABEL: define void @test_store_diamond_clobber2_caller +define void @test_store_diamond_clobber2_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_store_diamond_clobber2(i1 %c, i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_store_diamond_clobber2 +; store P2 and then P1 to preserve order in @test_store_diamond_clobber2 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_store_diamond_clobber3 +define internal void @test_store_diamond_clobber3(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-NEXT: br + store i32 42, i32* %P1 + store i32 1, i32* %P2 + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: br +st1: + %V2 = load i32, i32* %P2 + br label %exit + +st2: + br label %exit + +exit: + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P2 store + ret void +} + +;CHECK-LABEL: define void @test_store_diamond_clobber3_caller +define void @test_store_diamond_clobber3_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_store_diamond_clobber3(i1 %c, i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_store_diamond_clobber3 +; store P2 and then P1 to preserve order in @test_store_diamond_clobber3 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + + +;CHECK-LABEL: define internal i32 @test_store_diamond_clobber4 +define internal void @test_store_diamond_clobber4(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-NEXT: br + store i32 42, i32* %P1 + br i1 %c, label %st1, label %st2 + +st1: + store i32 1, i32* %P2 + %V2 = load i32, i32* %P2 + br label %exit + +st2: + br label %exit + +exit: + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P2 store + ret void +} + +define void @test_store_diamond_clobber4_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_store_diamond_clobber4(i1 %c, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal void @test_store_diamond_clobber5 +define internal void @test_store_diamond_clobber5(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-NEXT: store + store i32 42, i32* %P1 ; clobbers V2 load on st2 path + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: store +st1: + store i32 1, i32* %P2 + br label %exit + +st2: + br label %exit + +exit: + %V2 = load i32, i32* %P2 + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P2 store + ret void +} + +define void @test_store_diamond_clobber5_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_store_diamond_clobber5(i1 %c, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal i32 @test_store_diamond_clobber6 +define internal void @test_store_diamond_clobber6(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-NEXT: br + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: store i32 1, i32* %P2 +st1: + store i32 42, i32* %P1 + store i32 1, i32* %P2 + br label %exit + +st2: + br label %exit + +exit: + %V2 = load i32, i32* %P2 + store i32 43, i32* %P1 ; this store makes sure that P1 pointee isn't clobbered by P2 store + ret void +} + +define void @test_store_diamond_clobber6_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_store_diamond_clobber6(i1 %c, i32* %P1, i32* %P2) + ret void +} + + +;------- check clobbering in diamond + +;CHECK-LABEL: define internal void @test_clobber1 +define internal void @test_clobber1(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-NEXT: store + store i32 42, i32* %P2 ; clobbered by P1 stores, cannot promote + %V1 = load i32, i32* %P1 ; clobbered by P2 store above, cannot promote + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: store +st1: + store i32 1, i32* %P1 + br label %exit + +;CHECK-LABEL: st2: +;CHECK-NEXT: store +st2: + store i32 2, i32* %P1 + br label %exit + +;CHECK-LABEL: exit: +;CHECK-NEXT: load +exit: + %V2 = load i32, i32* %P1 + ret void +} + +define void @test_clobber1_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_clobber1(i1 %c, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_clobber2 +define internal void @test_clobber2(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 + %V1 = load i32, i32* %P1 ; no clobber + store i32 42, i32* %P2 ; clobbered by P1 stores, but unclobbered by P1 promotion + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: br +st1: + store i32 1, i32* %P1 + br label %exit + +;CHECK-LABEL: st2: +;CHECK-NEXT: br +st2: + store i32 2, i32* %P1 + br label %exit + +exit: + %V2 = load i32, i32* %P1 ; no clobber as every path writes by P1 + ret void +} + +;CHECK-LABEL: define void @test_clobber2_caller +define void @test_clobber2_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_clobber2(i1 %c, i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_clobber2 +; store P2 and then P1 to preserve order in @test_clobber2 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + + +;CHECK-LABEL: define internal i32 @test_clobber3 +define internal void @test_clobber3(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 + %V1 = load i32, i32* %P1 ; no clobber + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: store i32 42, i32* %P2 +;CHECK-NEXT: br +st1: + ; P2 isn't selected for promotion: not all paths have stores and + ; not a valid threal-local ptr + store i32 42, i32* %P2 + store i32 1, i32* %P1 + br label %exit + +;CHECK-LABEL: st2: +;CHECK-NEXT: br +st2: + store i32 2, i32* %P1 + br label %exit + +exit: + %V2 = load i32, i32* %P1 ; no clobber as every path writes by P1 + ret void +} + +;CHECK-LABEL: define void @test_clobber3_caller +define void @test_clobber3_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_clobber3(i1 %c, i32* %P1, i32* %P2) +;CHECK: %1 = call i32 @test_clobber3 +;CHECK: store i32 %1, i32* %P1 + ret void +} + + +;CHECK-LABEL: define internal void @test_clobber4 +define internal void @test_clobber4(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 + %V1 = load i32, i32* %P1 ; no clobber + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: store i32 1, i32* %P1 +;CHECK-NEXT: store i32 42, i32* %P2 +;CHECK-NEXT: br +st1: + store i32 1, i32* %P1 + ; P2 isn't selected for promotion: not all paths have stores and + ; not a valid threal-local ptr + store i32 42, i32* %P2 + br label %exit + +;CHECK-LABEL: st2: +;CHECK-NEXT: store i32 2, i32* %P1 +st2: + store i32 2, i32* %P1 + br label %exit + +exit: + %V2 = load i32, i32* %P1 ; clobbered by P2 write + ret void +} + +define void @test_clobber4_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_clobber4(i1 %c, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal void @test_clobber5 +define internal void @test_clobber5(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 + %V1 = load i32, i32* %P1 ; no clobber + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: store +st1: + store i32 1, i32* %P1 + br label %exit + +;CHECK-LABEL: st2: +;CHECK-NEXT: store +st2: + store i32 2, i32* %P1 + br label %exit + +exit: + store i32 42, i32* %P2 ; clobbers V2 load + %V2 = load i32, i32* %P1 ; clobbered by P2 write + ret void +} + +define void @test_clobber5_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_clobber5(i1 %c, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_clobber6 +define internal void @test_clobber6(i1 %c, i32* %P1, i32* %P2) { ; P1 may alias P2 + %V1 = load i32, i32* %P1 ; no clobber + br i1 %c, label %st1, label %st2 + +;CHECK-LABEL: st1: +;CHECK-NEXT: br +st1: + store i32 1, i32* %P1 + br label %exit + +;CHECK-LABEL: st2: +;CHECK-NEXT: br +st2: + store i32 2, i32* %P1 + br label %exit + +;CHECK-LABEL: exit: +;CHECK-NOT: load +exit: + %V2 = load i32, i32* %P1 + ; P1 pointee is clobbered by P2 write, but unclobbered after P2 promotion + store i32 42, i32* %P2 + ret void +} + +;CHECK-LABEL: define void @test_clobber6_caller +define void @test_clobber6_caller(i1 %c, i32* %P1, i32* %P2) { + call void @test_clobber6(i1 %c, i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_clobber6 +;CHECK: store i32 %P1 +;CHECK: store i32 %P2 + ret void +} + +;------- check clobbering in loops + +;CHECK-LABEL: define internal void @test_loop_clobber1 +define internal void @test_loop_clobber1(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: store i32 42, i32* %P2 +;CHECK-NEXT: %V1 = load i32, i32* %P1 +entry: + store i32 42, i32* %P2 ; clobbers V1 load + %V1 = load i32, i32* %P1 + store i32 1, i32* %P1 ; clobbers P2 store + br label %loop_header + +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +loop: + store i32 2, i32* %P1 + %i.next = sub i32 %i, 1 + br label %loop_header + +exit: + %V2 = load i32, i32* %P1 + ret void +} + +define void @test_loop_clobber1_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber1(i32 %n, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_loop_clobber2 +define internal void @test_loop_clobber2(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: br +entry: + %V1 = load i32, i32* %P1 + store i32 42, i32* %P2 ; clobbered by P1 store, but then unclobbered + store i32 1, i32* %P1 + br label %loop_header + +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +;CHECK-LABEL: loop: +;CHECK-NEXT: %i.next = sub i32 %i, 1 +loop: + store i32 2, i32* %P1 + %i.next = sub i32 %i, 1 + br label %loop_header + +exit: + %V2 = load i32, i32* %P1 + ret void +} + +;CHECK-LABEL: define void @test_loop_clobber2_caller +define void @test_loop_clobber2_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber2(i32 %n, i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_loop_clobber2 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + + +;CHECK-LABEL: define internal void @test_loop_clobber3 +define internal void @test_loop_clobber3(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: %V1 = load i32, i32* %P1 +;CHECK-NEXT: store i32 1, i32* %P1 +;CHECK-NEXT: store i32 42, i32* %P2 +entry: + %V1 = load i32, i32* %P1 + store i32 1, i32* %P1 + store i32 42, i32* %P2 ; clobbered by P1 store in loop BB + br label %loop_header + +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +loop: + store i32 2, i32* %P1 + %i.next = sub i32 %i, 1 + br label %loop_header + +exit: + %V2 = load i32, i32* %P1 ; clobbered by P2 store + ret void +} + +define void @test_loop_clobber3_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber3(i32 %n, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal void @test_loop_clobber4 +define internal void @test_loop_clobber4(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: %V1 = load i32, i32* %P1 +;CHECK-NEXT: store i32 1, i32* %P1 +entry: + %V1 = load i32, i32* %P1 + store i32 1, i32* %P1 + br label %loop_header + +;CHECK-LABEL: loop_header: +;CHECK: store i32 42, i32* %P2 +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + store i32 42, i32* %P2 ; clobbered by P1 store in loop BB + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +loop: + store i32 2, i32* %P1 + %i.next = sub i32 %i, 1 + br label %loop_header + +exit: + %V2 = load i32, i32* %P1 ; clobbered by P2 store + ret void +} + +define void @test_loop_clobber4_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber4(i32 %n, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal i32 @test_loop_clobber5 +define internal void @test_loop_clobber5(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: br +entry: + %V1 = load i32, i32* %P1 + store i32 1, i32* %P1 + br label %loop_header + +;CHECK-LABEL: loop_header: +;CHECK-NEXT: %P1.0.val.loop_header.phi = phi i32 [ 2, %loop ], [ 1, %entry ] +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +;CHECK-LABEL: loop: +;CHECK-NEXT: store i32 42, i32* %P2 +;CHECK-NEXT: %i.next = sub i32 %i, 1 +loop: + store i32 42, i32* %P2 ; not selected for promotion (no stores at every path) + store i32 2, i32* %P1 + %i.next = sub i32 %i, 1 + br label %loop_header + +exit: + %V2 = load i32, i32* %P1 + ret void +} + +;CHECK-LABEL: define void @test_loop_clobber5_caller +define void @test_loop_clobber5_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber5(i32 %n, i32* %P1, i32* %P2) +;CHECK: %1 = call i32 @test_loop_clobber5 +;CHECK: store i32 %1, i32* %P1 + ret void +} + + +;CHECK-LABEL: define internal void @test_loop_clobber6 +define internal void @test_loop_clobber6(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: %V1 = load i32, i32* %P1 +;CHECK-NEXT: store i32 1, i32* %P1 +entry: + %V1 = load i32, i32* %P1 + store i32 1, i32* %P1 + br label %loop_header + +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +;CHECK-LABEL: loop: +;CHECK-NEXT: store i32 2, i32* %P1 +;CHECK-NEXT: store i32 42, i32* %P2 +loop: + store i32 2, i32* %P1 + store i32 42, i32* %P2 ; not selected for promotion (no stores at every path) + %i.next = sub i32 %i, 1 + br label %loop_header + +exit: + %V2 = load i32, i32* %P1 ; clobbered by P2 store + ret void +} + +define void @test_loop_clobber6_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber6(i32 %n, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal void @test_loop_clobber7 +define internal void @test_loop_clobber7(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: %V1 = load i32, i32* %P1 +;CHECK-NEXT: store i32 1, i32* %P1 +entry: + %V1 = load i32, i32* %P1 + store i32 1, i32* %P1 + br label %loop_header + +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +loop: + store i32 2, i32* %P1 + %i.next = sub i32 %i, 1 + br label %loop_header + +;CHECK-LABEL: exit: +;CHECK-NEXT: store i32 42, i32* %P2 +exit: + store i32 42, i32* %P2 ; clobbers V2 load + %V2 = load i32, i32* %P1 ; clobbered by P2 store + ret void +} + +define void @test_loop_clobber7_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber7(i32 %n, i32* %P1, i32* %P2) + ret void +} + + +;CHECK-LABEL: define internal { i32, i32 } @test_loop_clobber8 +define internal void @test_loop_clobber8(i32 %n, i32* %P1, i32* %P2) { ; P1 may alias P2 +;CHECK-LABEL: entry: +;CHECK-NEXT: br +entry: + %V1 = load i32, i32* %P1 + store i32 1, i32* %P1 + br label %loop_header + +loop_header: + %i = phi i32 [%i.next, %loop], [%n, %entry] + %c = icmp eq i32 %i, 0 + br i1 %c, label %exit, label %loop + +;CHECK-LABEL: loop: +;CHECK-NEXT: %i.next = sub i32 %i, 1 +loop: + store i32 2, i32* %P1 + %i.next = sub i32 %i, 1 + br label %loop_header + +exit: + %V2 = load i32, i32* %P1 + store i32 42, i32* %P2 ; clobbers P1 pointee but it is unclobbered after P2 promotion + ret void +} + +;CHECK-LABEL: define void @test_loop_clobber8_caller +define void @test_loop_clobber8_caller(i32 %n, i32* %P1, i32* %P2) { + call void @test_loop_clobber8(i32 %n, i32* %P1, i32* %P2) +;CHECK: %1 = call { i32, i32 } @test_loop_clobber8 +;CHECK: store i32 %P1 +;CHECK: store i32 %P2 + ret void +} + +; ----------------------------------------------------------------------------- +; Test declobbering sequences + +;CHECK-LABEL: define internal { i32, i32, i32 } @test_store_unclobber1 +define internal void @test_store_unclobber1(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 1, i32* %P1 + store i32 2, i32* %P2 + store i32 3, i32* %P3 +; note that values are inserted in the order of arguments of the function +;CHECK: [[R:%[a-zA-Z0-9_]+]].ret0 = insertvalue { i32, i32, i32 } undef, i32 1, 0 +;CHECK-DAG: [[R]].ret1 = insertvalue { i32, i32, i32 } [[R]].ret0, i32 2, 1 +;CHECK-DAG: [[R]].ret2 = insertvalue { i32, i32, i32 } [[R]].ret1, i32 3, 2 + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber1_caller +define void @test_store_unclobber1_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_unclobber1(i32* %P1, i32* %P2, i32* %P3) +;CHECK: %1 = call { i32, i32, i32 } @test_store_unclobber1 +;CHECK: store i32 %P1 +;CHECK: store i32 %P2 +;CHECK: store i32 %P3 + ret void +} + +;CHECK-LABEL: define internal { i32, i32, i32 } @test_store_unclobber2 +define internal void @test_store_unclobber2(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 1, i32* %P1 + store i32 3, i32* %P3 + store i32 2, i32* %P2 +; note that values are inserted in the order of arguments of the function +;CHECK: [[R:%[a-zA-Z0-9_]+]].ret0 = insertvalue { i32, i32, i32 } undef, i32 1, 0 +;CHECK-DAG: [[R]].ret1 = insertvalue { i32, i32, i32 } [[R]].ret0, i32 2, 1 +;CHECK-DAG: [[R]].ret2 = insertvalue { i32, i32, i32 } [[R]].ret1, i32 3, 2 + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber2_caller +define void @test_store_unclobber2_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_unclobber2(i32* %P1, i32* %P2, i32* %P3) +;CHECK: %1 = call { i32, i32, i32 } @test_store_unclobber2 +;CHECK: store i32 %P1 +;CHECK: store i32 %P3 +;CHECK: store i32 %P2 + ret void +} + +;CHECK-LABEL: define internal { i32, i32, i32 } @test_store_unclobber3 +define internal void @test_store_unclobber3(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 2, i32* %P2 + store i32 1, i32* %P1 + store i32 3, i32* %P3 +; note that values are inserted in the order of arguments of the function +;CHECK: [[R:%[a-zA-Z0-9_]+]].ret0 = insertvalue { i32, i32, i32 } undef, i32 1, 0 +;CHECK-DAG: [[R]].ret1 = insertvalue { i32, i32, i32 } [[R]].ret0, i32 2, 1 +;CHECK-DAG: [[R]].ret2 = insertvalue { i32, i32, i32 } [[R]].ret1, i32 3, 2 + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber3_caller +define void @test_store_unclobber3_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_unclobber3(i32* %P1, i32* %P2, i32* %P3) +;CHECK: %1 = call { i32, i32, i32 } @test_store_unclobber3 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 +;CHECK: store i32 %P3 + ret void +} + +;CHECK-LABEL: define internal { i32, i32, i32 } @test_store_unclobber4 +define internal void @test_store_unclobber4(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 2, i32* %P2 + store i32 3, i32* %P3 + store i32 1, i32* %P1 +; note that values are inserted in the order of arguments of the function +;CHECK: [[R:%[a-zA-Z0-9_]+]].ret0 = insertvalue { i32, i32, i32 } undef, i32 1, 0 +;CHECK-DAG: [[R]].ret1 = insertvalue { i32, i32, i32 } [[R]].ret0, i32 2, 1 +;CHECK-DAG: [[R]].ret2 = insertvalue { i32, i32, i32 } [[R]].ret1, i32 3, 2 + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber4_caller +define void @test_store_unclobber4_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_unclobber4(i32* %P1, i32* %P2, i32* %P3) +;CHECK: %1 = call { i32, i32, i32 } @test_store_unclobber4 +;CHECK: store i32 %P2 +;CHECK: store i32 %P3 +;CHECK: store i32 %P1 + ret void +} + +;CHECK-LABEL: define internal { i32, i32, i32 } @test_store_unclobber5 +define internal void @test_store_unclobber5(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 3, i32* %P3 + store i32 1, i32* %P1 + store i32 2, i32* %P2 +; note that values are inserted in the order of arguments of the function +;CHECK: [[R:%[a-zA-Z0-9_]+]].ret0 = insertvalue { i32, i32, i32 } undef, i32 1, 0 +;CHECK-DAG: [[R]].ret1 = insertvalue { i32, i32, i32 } [[R]].ret0, i32 2, 1 +;CHECK-DAG: [[R]].ret2 = insertvalue { i32, i32, i32 } [[R]].ret1, i32 3, 2 + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber5_caller +define void @test_store_unclobber5_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_unclobber5(i32* %P1, i32* %P2, i32* %P3) +;CHECK: %1 = call { i32, i32, i32 } @test_store_unclobber5 +;CHECK: store i32 %P3 +;CHECK: store i32 %P1 +;CHECK: store i32 %P2 + ret void +} + +;CHECK-LABEL: define internal { i32, i32, i32 } @test_store_unclobber6 +define internal void @test_store_unclobber6(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 3, i32* %P3 + store i32 2, i32* %P2 + store i32 1, i32* %P1 +; note that values are inserted in the order of arguments of the function +;CHECK: [[R:%[a-zA-Z0-9_]+]].ret0 = insertvalue { i32, i32, i32 } undef, i32 1, 0 +;CHECK-DAG: [[R]].ret1 = insertvalue { i32, i32, i32 } [[R]].ret0, i32 2, 1 +;CHECK-DAG: [[R]].ret2 = insertvalue { i32, i32, i32 } [[R]].ret1, i32 3, 2 + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber6_caller +define void @test_store_unclobber6_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_unclobber6(i32* %P1, i32* %P2, i32* %P3) +;CHECK: %1 = call { i32, i32, i32 } @test_store_unclobber6 +;CHECK: store i32 %P3 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + +;CHECK-LABEL: define internal { i32, i32, i32 } @test_store_unclobber6_2x +define internal void @test_store_unclobber6_2x(i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 3, i32* %P3 + store i32 2, i32* %P2 + store i32 1, i32* %P1 + + store i32 5, i32* %P3 + store i32 6, i32* %P2 + store i32 4, i32* %P1 +; note that values are inserted in the order of arguments of the function +;CHECK: [[R:%[a-zA-Z0-9_]+]].ret0 = insertvalue { i32, i32, i32 } undef, i32 4, 0 +;CHECK-DAG: [[R]].ret1 = insertvalue { i32, i32, i32 } [[R]].ret0, i32 6, 1 +;CHECK-DAG: [[R]].ret2 = insertvalue { i32, i32, i32 } [[R]].ret1, i32 5, 2 + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber6_2x_caller +define void @test_store_unclobber6_2x_caller(i32* %P1, i32* %P2, i32* %P3) { + call void @test_store_unclobber6_2x(i32* %P1, i32* %P2, i32* %P3) +;CHECK: %1 = call { i32, i32, i32 } +;CHECK: store i32 %P3 +;CHECK: store i32 %P2 +;CHECK: store i32 %P1 + ret void +} + +;CHECK-LABEL: define internal void @test_store_unclobber_fail1 +define internal void @test_store_unclobber_fail1(i1 %c, i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 3, i32* %P1 + br i1 %c, label %st1, label %st2 +st1: + store i32 1, i32* %P2 + store i32 2, i32* %P3 + br label %exit +st2: + store i32 1, i32* %P3 + store i32 2, i32* %P2 + br label %exit +exit: + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber_fail1_caller +define void @test_store_unclobber_fail1_caller(i1 %c, i32* %P1, i32* %P2, i32* %P3) { +; CHECK: call void + call void @test_store_unclobber_fail1(i1 %c, i32* %P1, i32* %P2, i32* %P3) + ret void +} + + +;CHECK-LABEL: define internal void @test_store_unclobber_fail2 +define internal void @test_store_unclobber_fail2(i1 %c, i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 1, i32* %P2 + br i1 %c, label %st1, label %st2 +st1: + store i32 3, i32* %P1 + store i32 2, i32* %P3 + br label %exit +st2: + store i32 1, i32* %P3 + store i32 3, i32* %P1 + br label %exit +exit: + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber_fail2_caller +define void @test_store_unclobber_fail2_caller(i1 %c, i32* %P1, i32* %P2, i32* %P3) { +; CHECK: call void + call void @test_store_unclobber_fail2(i1 %c, i32* %P1, i32* %P2, i32* %P3) + ret void +} + + +;CHECK-LABEL: define internal void @test_store_unclobber_fail3 +define internal void @test_store_unclobber_fail3(i1 %c, i32* %P1, i32* %P2, i32* %P3) { ; P1, P2, P3 may alias + store i32 2, i32* %P3 + br i1 %c, label %st1, label %st2 +st1: + store i32 3, i32* %P1 + store i32 1, i32* %P2 + br label %exit +st2: + store i32 1, i32* %P2 + store i32 3, i32* %P1 + br label %exit +exit: + ret void +} + +;CHECK-LABEL: define void @test_store_unclobber_fail3_caller +define void @test_store_unclobber_fail3_caller(i1 %c, i32* %P1, i32* %P2, i32* %P3) { +; CHECK: call void + call void @test_store_unclobber_fail3(i1 %c, i32* %P1, i32* %P2, i32* %P3) + ret void +} + +; ----------------------------------------------------------------------------- +; Test declobbering in a more complicated CFG +;CHECK-LABEL: define internal { i32, i32, i32 } @nested_diamond +define internal i32 @nested_diamond(i1 %D1C, i1 %D2C, i32 %X, i32 %Y, i32 *%P1, i32* %P2) { +; D1 +; / \ +; D2 \ +; D2L D2R D1R +; D2E / +; \ / +; D1E +D1: + br i1 %D1C, label %D2, label %D1R + +D2: + br i1 %D2C, label %D2L, label %D2R + +D2L: +;CHECK-LABEL: D2L: +;CHECK-NEXT: br + store i32 %Y, i32* %P1 + store i32 %X, i32* %P1 + store i32 %X, i32* %P2 + store i32 %Y, i32* %P2 + br label %D2E + +D2R: +;CHECK-LABEL: D2R: +;CHECK-NEXT: br + store i32 %Y, i32* %P1 + store i32 %X, i32* %P2 + br label %D2E + +D2E: +;CHECK-LABEL: D2E: +;CHECK-NEXT: %P1.0.val.D2E.phi = phi i32 [ %Y, %D2R ], [ %X, %D2L ] +;CHECK-NEXT: %P2.0.val.D2E.phi = phi i32 [ %X, %D2R ], [ %Y, %D2L ] + br label %D1E + +D1R: +;CHECK-LABEL: D1R: +;CHECK-NEXT: br + store i32 %X, i32* %P1 + store i32 %Y, i32* %P2 + br label %D1E + +D1E: +;CHECK-LABEL: D1E: +;CHECK-NEXT: %P1.0.val.D1E.phi = phi i32 [ %X, %D1R ], [ %P1.0.val.D2E.phi, %D2E ] +;CHECK-NEXT: %P2.0.val.D1E.phi = phi i32 [ %Y, %D1R ], [ %P2.0.val.D2E.phi, %D2E ] +;CHECK-NEXT: [[R1:%.*]] = insertvalue { i32, i32, i32 } undef, i32 42, 0 +;CHECK-NEXT: [[R2:%.*]] = insertvalue { i32, i32, i32 } [[R1]], i32 %P1.0.val.D1E.phi, 1 +;CHECK-NEXT: [[R3:%.*]] = insertvalue { i32, i32, i32 } [[R2]], i32 %P2.0.val.D1E.phi, 2 +;CHECK-NEXT: ret { i32, i32, i32 } [[R3]] + ret i32 42 +} + +;CHECK-LABEL: define i32 @nested_diamond_caller +define i32 @nested_diamond_caller(i1 %D1C, i1 %D2C, i32 %X, i32 %Y, i32* %P1, i32* %P2) { + %C = call i32 @nested_diamond(i1 %D1C, i1 %D2C, i32 %X, i32 %Y, i32* %P1, i32* %P2) +; CHECK: %C = call { i32, i32, i32 } @nested_diamond(i1 %D1C, i1 %D2C, i32 %X, i32 %Y) +; CHECK-NEXT: %P1.val.ret = extractvalue { i32, i32, i32 } %C, 1 +; CHECK-NEXT: store i32 %P1.val.ret, i32* %P1, align 4 +; CHECK-NEXT: %P2.val.ret = extractvalue { i32, i32, i32 } %C, 2 +; CHECK-NEXT: store i32 %P2.val.ret, i32* %P2, align 4 + %V1 = load i32, i32* %P1 + %V2 = load i32, i32* %P2 + %Sum = add i32 %V1, %V2 + ret i32 %Sum +} diff --git a/llvm/test/Transforms/ArgumentPromotion/inoutargs2.ll b/llvm/test/Transforms/ArgumentPromotion/inoutargs2.ll new file mode 100755 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/inoutargs2.ll @@ -0,0 +1,1779 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt < %s -mssaargpromotion -S | FileCheck %s + +; REF-LABEL: define internal void @no_ret_blocks() #0 { +; REF-NEXT: unreachable +define internal void @no_ret_blocks() #0 { +; CHECK-LABEL: define {{[^@]+}}@no_ret_blocks +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: unreachable +; + unreachable +} + +define void @no_ret_blocks_caller() { +; CHECK-LABEL: define {{[^@]+}}@no_ret_blocks_caller() { +; CHECK-NEXT: call void @no_ret_blocks() +; CHECK-NEXT: ret void +; + call void @no_ret_blocks() + ret void +} + +; REF-LABEL: @void_one_out_arg_i32_no_use( +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_no_use(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_no_use +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret void +; + ret void +} + +define void @void_one_out_arg_i32_no_use_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_no_use_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @void_one_out_arg_i32_no_use() +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_no_use(i32* %val) + ret void +} + +; REF-LABEL: define internal void @skip_byval_arg( +; REF-NEXT: store i32 0, i32* %val +; REF-NEXT: ret void +define internal void @skip_byval_arg(i32* byval(i32) %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@skip_byval_arg +; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + store i32 0, i32* %val + ret void +} + +define void @skip_byval_arg_caller(i32* byval(i32) %val) { +; CHECK-LABEL: define {{[^@]+}}@skip_byval_arg_caller +; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) { +; CHECK-NEXT: call void @skip_byval_arg(i32* byval(i32) [[VAL]]) +; CHECK-NEXT: ret void +; + call void @skip_byval_arg(i32* byval(i32) %val) + ret void +} + +; REF-LABEL: define internal void @skip_optnone( +; REF-NEXT: store i32 0, i32* %val +; REF-NEXT: ret void +define internal void @skip_optnone(i32* %val) #1 { +; CHECK-LABEL: define {{[^@]+}}@skip_optnone +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + store i32 0, i32* %val + ret void +} + +define void @skip_optnone_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@skip_optnone_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @skip_optnone(i32* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @skip_optnone(i32* %val) + ret void +} + +; REF-LABEL: define internal void @skip_volatile( +; REF-NEXT: store volatile i32 0, i32* %val +; REF-NEXT: ret void +define internal void @skip_volatile(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@skip_volatile +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store volatile i32 0, i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + store volatile i32 0, i32* %val + ret void +} + +define void @skip_volatile_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@skip_volatile_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @skip_volatile(i32* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @skip_volatile(i32* %val) + ret void +} + +; REF-LABEL: define internal void @skip_atomic( +; REF-NEXT: store atomic i32 0, i32* %val +; REF-NEXT: ret void +define internal void @skip_atomic(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@skip_atomic +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store atomic i32 0, i32* [[VAL]] seq_cst, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 0, i32* %val seq_cst, align 4 + ret void +} + +define void @skip_atomic_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@skip_atomic_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @skip_atomic(i32* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @skip_atomic(i32* %val) + ret void +} + +; REF-LABEL: define internal void @skip_store_pointer_val( +; REF-NEXT: store i32* %val, i32** undef +; REF-NEXT: ret void +define internal void @skip_store_pointer_val(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@skip_store_pointer_val +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32* [[VAL]], i32** undef, align 8 +; CHECK-NEXT: ret void +; + store i32* %val, i32** undef + ret void +} + +define void @skip_store_pointer_val_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@skip_store_pointer_val_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @skip_store_pointer_val(i32* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @skip_store_pointer_val(i32* %val) + ret void +} + +; REF-LABEL: define internal void @skip_store_gep( +; REF-NEXT: %gep = getelementptr inbounds i32, i32* %val, i32 1 +; REF-NEXT: store i32 0, i32* %gep +; REF-NEXT: ret void +define internal void @skip_store_gep(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@skip_store_gep +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32* [[VAL]], i32 1 +; CHECK-NEXT: store i32 0, i32* [[GEP]], align 4 +; CHECK-NEXT: ret void +; + %gep = getelementptr inbounds i32, i32* %val, i32 1 + store i32 0, i32* %gep + ret void +} + +define void @skip_store_gep_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@skip_store_gep_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @skip_store_gep(i32* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @skip_store_gep(i32* %val) + ret void +} + +; *** TODO: is this valid handling for sret? +; REF-LABEL: define internal void @skip_sret(i32* sret(i32) %sret, i32* %out) #0 { +; REF-NEXT: store +; REF-NEXT: store +; REF-NEXT: ret void +define internal void @skip_sret(i32* sret(i32) %sret, i32* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@skip_sret +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[SKIP_SRET_RET0:%.*]] = insertvalue { i32, i32 } undef, i32 1, 0 +; CHECK-NEXT: [[SKIP_SRET_RET1:%.*]] = insertvalue { i32, i32 } [[SKIP_SRET_RET0]], i32 0, 1 +; CHECK-NEXT: ret { i32, i32 } [[SKIP_SRET_RET1]] +; + store i32 1, i32* %sret + store i32 0, i32* %out + ret void +} + +define void @skip_sret_caller(i32* sret(i32) %sret, i32* %out) { +; CHECK-LABEL: define {{[^@]+}}@skip_sret_caller +; CHECK-SAME: (i32* sret(i32) [[SRET:%.*]], i32* [[OUT:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } @skip_sret() +; CHECK-NEXT: [[SRET_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +; CHECK-NEXT: store i32 [[SRET_VAL_RET]], i32* [[SRET]], align 4 +; CHECK-NEXT: [[OUT_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[OUT_VAL_RET]], i32* [[OUT]], align 4 +; CHECK-NEXT: ret void +; + call void @skip_sret(i32* sret(i32) %sret, i32* %out) + ret void +} + +; REF-LABEL: define private %void_one_out_arg_i32_1_use @void_one_out_arg_i32_1_use.body(i32* %val) #0 { +; REF-NEXT: ret %void_one_out_arg_i32_1_use zeroinitializer + +; REF-LABEL: @void_one_out_arg_i32_1_use( +; REF-NEXT: %2 = call %void_one_out_arg_i32_1_use @void_one_out_arg_i32_1_use.body(i32* undef) +; REF-NEXT: %3 = extractvalue %void_one_out_arg_i32_1_use %2, 0 +; REF-NEXT: store i32 %3, i32* %0, align 4 +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_1_use(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + store i32 0, i32* %val + ret void +} + +define void @void_one_out_arg_i32_1_use_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_1_use() +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_1_use(i32* %val) + ret void +} + +; REF-LABEL: define private %void_one_out_arg_i32_1_use_align @void_one_out_arg_i32_1_use_align.body(i32* align 8 %val) #0 { +; REF-NEXT: ret %void_one_out_arg_i32_1_use_align zeroinitializer + +; REF-LABEL: @void_one_out_arg_i32_1_use_align( +; REF-NEXT: %2 = call %void_one_out_arg_i32_1_use_align @void_one_out_arg_i32_1_use_align.body(i32* undef) +; REF-NEXT: %3 = extractvalue %void_one_out_arg_i32_1_use_align %2, 0 +; REF-NEXT: store i32 %3, i32* %0, align 8 +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_1_use_align(i32* align 8 %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + store i32 0, i32* %val, align 8 + ret void +} + +define void @void_one_out_arg_i32_1_use_align_caller(i32* align 8 %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align_caller +; CHECK-SAME: (i32* align 8 [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_1_use_align() +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 8 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_1_use_align(i32* align 8 %val) + ret void +} + +; REF-LABEL: define private %void_one_out_arg_i32_2_use @void_one_out_arg_i32_2_use.body(i1 %arg0, i32* %val) #0 { +; REF: br i1 %arg0, label %ret0, label %ret1 + +; REF: ret0: +; REF-NEXT: ret %void_one_out_arg_i32_2_use zeroinitializer + +; REF: ret1: +; REF-NEXT: ret %void_one_out_arg_i32_2_use { i32 9 } + +; REF-LABEL: define internal void @void_one_out_arg_i32_2_use(i1 %0, i32* %1) #2 { +; REF-NEXT: %3 = call %void_one_out_arg_i32_2_use @void_one_out_arg_i32_2_use.body(i1 %0, i32* undef) +; REF-NEXT: %4 = extractvalue %void_one_out_arg_i32_2_use %3, 0 +; REF-NEXT: store i32 %4, i32* %1, align 4 +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_2_use(i1 %arg0, i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use +; CHECK-SAME: (i1 [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]] +; CHECK: ret0: +; CHECK-NEXT: ret i32 0 +; CHECK: ret1: +; CHECK-NEXT: ret i32 9 +; + br i1 %arg0, label %ret0, label %ret1 + +ret0: + store i32 0, i32* %val + ret void + +ret1: + store i32 9, i32* %val + ret void +} + +define void @void_one_out_arg_i32_2_use_caller(i1 %arg0, i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use_caller +; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_2_use(i1 [[ARG0]]) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_2_use(i1 %arg0, i32* %val) + ret void +} + +declare void @may.clobber() + +; REF-LABEL: define private %void_one_out_arg_i32_2_stores @void_one_out_arg_i32_2_stores.body(i32* %val) #0 { +; REF-NEXT: store i32 0, i32* %val +; REF-NEXT: ret %void_one_out_arg_i32_2_stores { i32 1 } + +; REF-LABEL: define internal void @void_one_out_arg_i32_2_stores(i32* %0) #2 { +; REF-NEXT: %2 = call %void_one_out_arg_i32_2_stores @void_one_out_arg_i32_2_stores.body(i32* undef) +; REF-NEXT: %3 = extractvalue %void_one_out_arg_i32_2_stores %2, 0 +; REF-NEXT: store i32 %3, i32* %0, align 4 +define internal void @void_one_out_arg_i32_2_stores(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret i32 1 +; + store i32 0, i32* %val + store i32 1, i32* %val + ret void +} + +define void @void_one_out_arg_i32_2_stores_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_2_stores() +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_2_stores(i32* %val) + ret void +} + +; REF-LABEL: define private %void_one_out_arg_i32_2_stores_clobber @void_one_out_arg_i32_2_stores_clobber.body(i32* %val) #0 { +; REF-NEXT: store i32 0, i32* %val +; REF-NEXT: call void @may.clobber() +; REF-NEXT: ret %void_one_out_arg_i32_2_stores_clobber { i32 1 } + +; REF-LABEL: define internal void @void_one_out_arg_i32_2_stores_clobber(i32* %0) #2 { +; REF-NEXT: %2 = call %void_one_out_arg_i32_2_stores_clobber @void_one_out_arg_i32_2_stores_clobber.body(i32* undef) +; REF-NEXT: %3 = extractvalue %void_one_out_arg_i32_2_stores_clobber %2, 0 +; REF-NEXT: store i32 %3, i32* %0, align 4 +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_2_stores_clobber(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @may.clobber() +; CHECK-NEXT: ret i32 1 +; + store i32 0, i32* %val + call void @may.clobber() + store i32 1, i32* %val + ret void +} + +define void @void_one_out_arg_i32_2_stores_clobber_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_2_stores_clobber() +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_2_stores_clobber(i32* %val) + ret void +} + + +; REF-LABEL: define internal void @void_one_out_arg_i32_call_may_clobber(i32* %val) #0 { +; REF-NEXT: store i32 0, i32* %val +; REF-NEXT: call void @may.clobber() +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_call_may_clobber(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_call_may_clobber +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 0, i32* [[VAL]], align 4 +; CHECK-NEXT: call void @may.clobber() +; CHECK-NEXT: ret void +; + store i32 0, i32* %val + call void @may.clobber() + ret void +} + +define void @void_one_out_arg_i32_call_may_clobber_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_call_may_clobber_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @void_one_out_arg_i32_call_may_clobber(i32* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_call_may_clobber(i32* %val) + ret void +} + +; REF-LABEL: define private %void_one_out_arg_i32_pre_call_may_clobber @void_one_out_arg_i32_pre_call_may_clobber.body(i32* %val) #0 { +; REF-NEXT: call void @may.clobber() +; REF-NEXT: ret %void_one_out_arg_i32_pre_call_may_clobber zeroinitializer + +; REF-LABEL: @void_one_out_arg_i32_pre_call_may_clobber(i32* %0) #2 { +; REF-NEXT: %2 = call %void_one_out_arg_i32_pre_call_may_clobber @void_one_out_arg_i32_pre_call_may_clobber.body(i32* undef) +; REF-NEXT: %3 = extractvalue %void_one_out_arg_i32_pre_call_may_clobber %2, 0 +; REF-NEXT: store i32 %3, i32* %0, align 4 +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_pre_call_may_clobber(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @may.clobber() +; CHECK-NEXT: ret i32 0 +; + call void @may.clobber() + store i32 0, i32* %val + ret void +} + +define void @void_one_out_arg_i32_pre_call_may_clobber_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_pre_call_may_clobber() +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_pre_call_may_clobber(i32* %val) + ret void +} + +; REF-LABEL: define internal void @void_one_out_arg_i32_reload(i32* %val) #0 { +; REF: store i32 0, i32* %val +; REF: %load = load i32, i32* %val, align 4 +; REF: ret void +define internal void @void_one_out_arg_i32_reload(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_reload +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + store i32 0, i32* %val + %load = load i32, i32* %val, align 4 + ret void +} + +define void @void_one_out_arg_i32_reload_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_reload_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_reload() +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_reload(i32* %val) + ret void +} + +; REF-LABEL: define internal void @void_one_out_arg_i32_store_in_different_block( +; REF-NEXT: %load = load i32, i32 addrspace(1)* undef +; REF-NEXT: store i32 0, i32* %out +; REF-NEXT: br label %ret +; REF: ret: +; REF-NEXT: ret void +define internal void @void_one_out_arg_i32_store_in_different_block(i32* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_store_in_different_block +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32 addrspace(1)* undef, align 4 +; CHECK-NEXT: br label [[RET:%.*]] +; CHECK: ret: +; CHECK-NEXT: ret i32 0 +; + %load = load i32, i32 addrspace(1)* undef + store i32 0, i32* %out + br label %ret + +ret: + ret void +} + +define void @void_one_out_arg_i32_store_in_different_block_caller(i32* %out) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_store_in_different_block_caller +; CHECK-SAME: (i32* [[OUT:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_arg_i32_store_in_different_block() +; CHECK-NEXT: store i32 [[TMP1]], i32* [[OUT]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_i32_store_in_different_block(i32* %out) + ret void +} + +; REF-LABEL: define internal void @unused_out_arg_one_branch( +; REF: ret0: +; REF-NEXT: ret void + +; REF: ret1: +; REF-NEXT: store i32 9, i32* %val +; REF-NEXT: ret void +define internal void @unused_out_arg_one_branch(i1 %arg0, i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@unused_out_arg_one_branch +; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]] +; CHECK: ret0: +; CHECK-NEXT: ret void +; CHECK: ret1: +; CHECK-NEXT: store i32 9, i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + br i1 %arg0, label %ret0, label %ret1 + +ret0: + ret void + +ret1: + store i32 9, i32* %val + ret void +} + +define void @unused_out_arg_one_branch_caller(i1 %arg0, i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@unused_out_arg_one_branch_caller +; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[VAL:%.*]]) { +; CHECK-NEXT: call void @unused_out_arg_one_branch(i1 [[ARG0]], i32* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @unused_out_arg_one_branch(i1 %arg0, i32* %val) + ret void +} + +; REF-LABEL: define private %void_one_out_arg_v2i32_1_use @void_one_out_arg_v2i32_1_use.body(<2 x i32>* %val) #0 { +; REF-NEXT: ret %void_one_out_arg_v2i32_1_use { <2 x i32> } + +; REF-LABEL: define internal void @void_one_out_arg_v2i32_1_use(<2 x i32>* %0) #2 { +; REF-NEXT: %2 = call %void_one_out_arg_v2i32_1_use @void_one_out_arg_v2i32_1_use.body(<2 x i32>* undef) +; REF-NEXT: %3 = extractvalue %void_one_out_arg_v2i32_1_use %2, 0 +; REF-NEXT: store <2 x i32> %3, <2 x i32>* %0, align 8 +; REF-NEXT: ret void +define internal void @void_one_out_arg_v2i32_1_use(<2 x i32>* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret <2 x i32> +; + store <2 x i32> , <2 x i32>* %val + ret void +} + +define void @void_one_out_arg_v2i32_1_use_caller(<2 x i32>* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use_caller +; CHECK-SAME: (<2 x i32>* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @void_one_out_arg_v2i32_1_use() +; CHECK-NEXT: store <2 x i32> [[TMP1]], <2 x i32>* [[VAL]], align 8 +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_v2i32_1_use(<2 x i32>* %val) + ret void +} + +%struct = type { i32, i8, float } + +; *** TODO: this should be handled as well +; REF-LABEL: define private %void_one_out_arg_struct_1_use @void_one_out_arg_struct_1_use.body(%struct* %out) #0 { +; REF-NEXT: ret %void_one_out_arg_struct_1_use { %struct { i32 9, i8 99, float 4.000000e+00 } } + +; Normally this is split into element accesses which we don't handle. +; REF-LABEL: define internal void @void_one_out_arg_struct_1_use(%struct* %0) #2 { +; REF-NEXT: %2 = call %void_one_out_arg_struct_1_use @void_one_out_arg_struct_1_use.body(%struct* undef) +; REF-NEXT: %3 = extractvalue %void_one_out_arg_struct_1_use %2, 0 +; REF-NEXT: store %struct %3, %struct* %0, align 4 +; REF-NEXT: ret void +define internal void @void_one_out_arg_struct_1_use(%struct* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use +; CHECK-SAME: (%struct* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store [[STRUCT:%.*]] { i32 9, i8 99, float 4.000000e+00 }, %struct* [[OUT]], align 4 +; CHECK-NEXT: ret void +; + store %struct { i32 9, i8 99, float 4.0 }, %struct* %out + ret void +} + +define void @void_one_out_arg_struct_1_use_caller(%struct* %out) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use_caller +; CHECK-SAME: (%struct* [[OUT:%.*]]) { +; CHECK-NEXT: call void @void_one_out_arg_struct_1_use(%struct* [[OUT]]) +; CHECK-NEXT: ret void +; + call void @void_one_out_arg_struct_1_use(%struct* %out) + ret void +} + +; REF-LABEL: define private %i32_one_out_arg_i32_1_use @i32_one_out_arg_i32_1_use.body(i32* %val) #0 { +; REF-NEXT: ret %i32_one_out_arg_i32_1_use { i32 9, i32 24 } + +; REF-LABEL: define internal i32 @i32_one_out_arg_i32_1_use(i32* %0) #2 { +; REF-NEXT: %2 = call %i32_one_out_arg_i32_1_use @i32_one_out_arg_i32_1_use.body(i32* undef) +; REF-NEXT: %3 = extractvalue %i32_one_out_arg_i32_1_use %2, 1 +; REF-NEXT: store i32 %3, i32* %0, align 4 +; REF-NEXT: %4 = extractvalue %i32_one_out_arg_i32_1_use %2, 0 +; REF-NEXT: ret i32 %4 +define internal i32 @i32_one_out_arg_i32_1_use(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[I32_ONE_OUT_ARG_I32_1_USE_RET:%.*]] = insertvalue { i32, i32 } undef, i32 9, 0 +; CHECK-NEXT: [[I32_ONE_OUT_ARG_I32_1_USE_RET1:%.*]] = insertvalue { i32, i32 } [[I32_ONE_OUT_ARG_I32_1_USE_RET]], i32 24, 1 +; CHECK-NEXT: ret { i32, i32 } [[I32_ONE_OUT_ARG_I32_1_USE_RET1]] +; + store i32 24, i32* %val + ret i32 9 +} + +define i32 @i32_one_out_arg_i32_1_use_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call { i32, i32 } @i32_one_out_arg_i32_1_use() +; CHECK-NEXT: [[R_RET:%.*]] = extractvalue { i32, i32 } [[R]], 0 +; CHECK-NEXT: [[VAL_VAL_RET:%.*]] = extractvalue { i32, i32 } [[R]], 1 +; CHECK-NEXT: store i32 [[VAL_VAL_RET]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret i32 [[R_RET]] +; + %R = call i32 @i32_one_out_arg_i32_1_use(i32* %val) + ret i32 %R +} + +; REF-LABEL: define private %unused_different_type @unused_different_type.body(i32* %arg0, float* nocapture %arg1) #0 { +; REF-NEXT: ret %unused_different_type { float 4.000000e+00 } + +; REF-LABEL: define internal void @unused_different_type(i32* %0, float* nocapture %1) #2 { +; REF-NEXT: %3 = call %unused_different_type @unused_different_type.body(i32* %0, float* undef) +; REF-NEXT: %4 = extractvalue %unused_different_type %3, 0 +; REF-NEXT: store float %4, float* %1, align 4 +; REF-NEXT: ret void +define internal void @unused_different_type(i32* %arg0, float* nocapture %arg1) #0 { +; CHECK-LABEL: define {{[^@]+}}@unused_different_type +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret float 4.000000e+00 +; + store float 4.0, float* %arg1, align 4 + ret void +} + +define void @unused_different_type_caller(i32* %arg0, float* nocapture %arg1) { +; CHECK-LABEL: define {{[^@]+}}@unused_different_type_caller +; CHECK-SAME: (i32* [[ARG0:%.*]], float* nocapture [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call float @unused_different_type() +; CHECK-NEXT: store float [[TMP1]], float* [[ARG1]], align 4 +; CHECK-NEXT: ret void +; + call void @unused_different_type(i32* %arg0, float* nocapture %arg1) + ret void +} + +; REF-LABEL: define private %multiple_same_return_noalias @multiple_same_return_noalias.body(i32* noalias %out0, i32* noalias %out1) #0 { +; REF-NEXT: ret %multiple_same_return_noalias { i32 1, i32 2 } + +; REF-LABEL: define internal void @multiple_same_return_noalias( +; REF-NEXT: %3 = call %multiple_same_return_noalias @multiple_same_return_noalias.body(i32* undef, i32* undef) +; REF-NEXT: %4 = extractvalue %multiple_same_return_noalias %3, 0 +; REF-NEXT: store i32 %4, i32* %0, align 4 +; REF-NEXT: %5 = extractvalue %multiple_same_return_noalias %3, 1 +; REF-NEXT: store i32 %5, i32* %1, align 4 +; REF-NEXT: ret void +define internal void @multiple_same_return_noalias(i32* noalias %out0, i32* noalias %out1) #0 { +; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[MULTIPLE_SAME_RETURN_NOALIAS_RET0:%.*]] = insertvalue { i32, i32 } undef, i32 1, 0 +; CHECK-NEXT: [[MULTIPLE_SAME_RETURN_NOALIAS_RET1:%.*]] = insertvalue { i32, i32 } [[MULTIPLE_SAME_RETURN_NOALIAS_RET0]], i32 2, 1 +; CHECK-NEXT: ret { i32, i32 } [[MULTIPLE_SAME_RETURN_NOALIAS_RET1]] +; + store i32 1, i32* %out0, align 4 + store i32 2, i32* %out1, align 4 + ret void +} + +define void @multiple_same_return_noalias_caller(i32* noalias %out0, i32* noalias %out1) { +; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias_caller +; CHECK-SAME: (i32* noalias [[OUT0:%.*]], i32* noalias [[OUT1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } @multiple_same_return_noalias() +; CHECK-NEXT: [[OUT1_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[OUT1_VAL_RET]], i32* [[OUT1]], align 4 +; CHECK-NEXT: [[OUT0_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +; CHECK-NEXT: store i32 [[OUT0_VAL_RET]], i32* [[OUT0]], align 4 +; CHECK-NEXT: ret void +; + call void @multiple_same_return_noalias(i32* noalias %out0, i32* noalias %out1) + ret void +} + +; REF-LABEL: define private %multiple_same_return_mayalias @multiple_same_return_mayalias.body(i32* %out0, i32* %out1) #0 { +; REF-NEXT: ret %multiple_same_return_mayalias { i32 2, i32 1 } + +; REF-LABEL: define internal void @multiple_same_return_mayalias(i32* %0, i32* %1) #2 { +; REF-NEXT: %3 = call %multiple_same_return_mayalias @multiple_same_return_mayalias.body(i32* undef, i32* undef) +; REF-NEXT: %4 = extractvalue %multiple_same_return_mayalias %3, 0 +; REF-NEXT: store i32 %4, i32* %0, align 4 +; REF-NEXT: %5 = extractvalue %multiple_same_return_mayalias %3, 1 +; REF-NEXT: store i32 %5, i32* %1, align 4 +; REF-NEXT: ret void +define internal void @multiple_same_return_mayalias(i32* %out0, i32* %out1) #0 { +; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[MULTIPLE_SAME_RETURN_MAYALIAS_RET0:%.*]] = insertvalue { i32, i32 } undef, i32 1, 0 +; CHECK-NEXT: [[MULTIPLE_SAME_RETURN_MAYALIAS_RET1:%.*]] = insertvalue { i32, i32 } [[MULTIPLE_SAME_RETURN_MAYALIAS_RET0]], i32 2, 1 +; CHECK-NEXT: ret { i32, i32 } [[MULTIPLE_SAME_RETURN_MAYALIAS_RET1]] +; + store i32 1, i32* %out0, align 4 + store i32 2, i32* %out1, align 4 + ret void +} + +define void @multiple_same_return_mayalias_caller(i32* %out0, i32* %out1) { +; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_caller +; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } @multiple_same_return_mayalias() +; CHECK-NEXT: [[OUT0_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +; CHECK-NEXT: store i32 [[OUT0_VAL_RET]], i32* [[OUT0]], align 4 +; CHECK-NEXT: [[OUT1_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[OUT1_VAL_RET]], i32* [[OUT1]], align 4 +; CHECK-NEXT: ret void +; + call void @multiple_same_return_mayalias(i32* %out0, i32* %out1) + ret void +} + +; REF-LABEL: define private %multiple_same_return_mayalias_order @multiple_same_return_mayalias_order.body(i32* %out0, i32* %out1) #0 { +; REF-NEXT: ret %multiple_same_return_mayalias_order { i32 1, i32 2 } + +; REF-LABEL: define internal void @multiple_same_return_mayalias_order(i32* %0, i32* %1) #2 { +; REF-NEXT: %3 = call %multiple_same_return_mayalias_order @multiple_same_return_mayalias_order.body(i32* undef, i32* undef) +; REF-NEXT: %4 = extractvalue %multiple_same_return_mayalias_order %3, 0 +; REF-NEXT: store i32 %4, i32* %0, align 4 +; REF-NEXT: %5 = extractvalue %multiple_same_return_mayalias_order %3, 1 +; REF-NEXT: store i32 %5, i32* %1, align 4 +; REF-NEXT: ret void +define internal void @multiple_same_return_mayalias_order(i32* %out0, i32* %out1) #0 { +; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER_RET0:%.*]] = insertvalue { i32, i32 } undef, i32 1, 0 +; CHECK-NEXT: [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER_RET1:%.*]] = insertvalue { i32, i32 } [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER_RET0]], i32 2, 1 +; CHECK-NEXT: ret { i32, i32 } [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER_RET1]] +; + store i32 2, i32* %out1, align 4 + store i32 1, i32* %out0, align 4 + ret void +} + +define void @multiple_same_return_mayalias_order_caller(i32* %out0, i32* %out1) { +; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order_caller +; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i32 } @multiple_same_return_mayalias_order() +; CHECK-NEXT: [[OUT1_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 1 +; CHECK-NEXT: store i32 [[OUT1_VAL_RET]], i32* [[OUT1]], align 4 +; CHECK-NEXT: [[OUT0_VAL_RET:%.*]] = extractvalue { i32, i32 } [[TMP1]], 0 +; CHECK-NEXT: store i32 [[OUT0_VAL_RET]], i32* [[OUT0]], align 4 +; CHECK-NEXT: ret void +; + call void @multiple_same_return_mayalias_order(i32* %out0, i32* %out1) + ret void +} + +; Currently this fails to convert because the store won't be found if +; it isn't in the same block as the return. +; REF-LABEL: define internal i32 @store_in_entry_block(i1 %arg0, i32* %out) #0 { +; REF-NOT: call +define internal i32 @store_in_entry_block(i1 %arg0, i32* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@store_in_entry_block +; CHECK-SAME: (i1 [[ARG0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL0:%.*]] = load i32, i32 addrspace(1)* undef, align 4 +; CHECK-NEXT: br i1 [[ARG0]], label [[IF:%.*]], label [[ENDIF:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL1:%.*]] = load i32, i32 addrspace(1)* undef, align 4 +; CHECK-NEXT: br label [[ENDIF]] +; CHECK: endif: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL1]], [[IF]] ] +; CHECK-NEXT: [[STORE_IN_ENTRY_BLOCK_ENDIF_RET:%.*]] = insertvalue { i32, i32 } undef, i32 [[PHI]], 0 +; CHECK-NEXT: [[STORE_IN_ENTRY_BLOCK_ENDIF_RET1:%.*]] = insertvalue { i32, i32 } [[STORE_IN_ENTRY_BLOCK_ENDIF_RET]], i32 [[VAL0]], 1 +; CHECK-NEXT: ret { i32, i32 } [[STORE_IN_ENTRY_BLOCK_ENDIF_RET1]] +; +entry: + %val0 = load i32, i32 addrspace(1)* undef + store i32 %val0, i32* %out + br i1 %arg0, label %if, label %endif + +if: + %val1 = load i32, i32 addrspace(1)* undef + br label %endif + +endif: + %phi = phi i32 [ 0, %entry ], [ %val1, %if ] + ret i32 %phi +} + +define i32 @store_in_entry_block_caller(i1 %arg0, i32* %out) { +; CHECK-LABEL: define {{[^@]+}}@store_in_entry_block_caller +; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[OUT:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call { i32, i32 } @store_in_entry_block(i1 [[ARG0]]) +; CHECK-NEXT: [[R_RET:%.*]] = extractvalue { i32, i32 } [[R]], 0 +; CHECK-NEXT: [[OUT_VAL_RET:%.*]] = extractvalue { i32, i32 } [[R]], 1 +; CHECK-NEXT: store i32 [[OUT_VAL_RET]], i32* [[OUT]], align 4 +; CHECK-NEXT: ret i32 [[R_RET]] +; + %R = call i32 @store_in_entry_block(i1 %arg0, i32* %out) + ret i32 %R +} + +; REF-LABEL: define private %i1_one_out_arg_i32_1_use @i1_one_out_arg_i32_1_use.body(i32* %val) #0 { +; REF-NEXT: ret %i1_one_out_arg_i32_1_use { i1 true, i32 24 } + +; REF-LABEL: define internal i1 @i1_one_out_arg_i32_1_use(i32* %0) #2 { +; REF: %2 = call %i1_one_out_arg_i32_1_use @i1_one_out_arg_i32_1_use.body(i32* undef) +; REF: %3 = extractvalue %i1_one_out_arg_i32_1_use %2, 1 +; REF: store i32 %3, i32* %0, align 4 +; REF: %4 = extractvalue %i1_one_out_arg_i32_1_use %2, 0 +; REF: ret i1 %4 +define internal i1 @i1_one_out_arg_i32_1_use(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[I1_ONE_OUT_ARG_I32_1_USE_RET:%.*]] = insertvalue { i1, i32 } undef, i1 true, 0 +; CHECK-NEXT: [[I1_ONE_OUT_ARG_I32_1_USE_RET1:%.*]] = insertvalue { i1, i32 } [[I1_ONE_OUT_ARG_I32_1_USE_RET]], i32 24, 1 +; CHECK-NEXT: ret { i1, i32 } [[I1_ONE_OUT_ARG_I32_1_USE_RET1]] +; + store i32 24, i32* %val + ret i1 true +} + +define i1 @i1_one_out_arg_i32_1_use_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call { i1, i32 } @i1_one_out_arg_i32_1_use() +; CHECK-NEXT: [[R_RET:%.*]] = extractvalue { i1, i32 } [[R]], 0 +; CHECK-NEXT: [[VAL_VAL_RET:%.*]] = extractvalue { i1, i32 } [[R]], 1 +; CHECK-NEXT: store i32 [[VAL_VAL_RET]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret i1 [[R_RET]] +; + %R = call i1 @i1_one_out_arg_i32_1_use(i32* %val) + ret i1 %R +} + +; *** MSSA pass just refuses to promote if return values has attribute +; Make sure we don't leave around return attributes that are +; incompatible with struct return types. + +; REF-LABEL: define private %i1_zeroext_one_out_arg_i32_1_use @i1_zeroext_one_out_arg_i32_1_use.body(i32* %val) #0 { +; REF-NEXT: ret %i1_zeroext_one_out_arg_i32_1_use { i1 true, i32 24 } + +; REF-LABEL: define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(i32* %0) #2 { +; REF-NEXT: %2 = call %i1_zeroext_one_out_arg_i32_1_use @i1_zeroext_one_out_arg_i32_1_use.body(i32* undef) +; REF-NEXT: %3 = extractvalue %i1_zeroext_one_out_arg_i32_1_use %2, 1 +; REF-NEXT: store i32 %3, i32* %0, align 4 +; REF-NEXT: %4 = extractvalue %i1_zeroext_one_out_arg_i32_1_use %2, 0 +; REF-NEXT: ret i1 %4 +define internal zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 24, i32* [[VAL]], align 4 +; CHECK-NEXT: ret i1 true +; + store i32 24, i32* %val + ret i1 true +} + +define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(i32* [[VAL]]) +; CHECK-NEXT: ret i1 [[R]] +; + %R = call zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(i32* %val) + ret i1 %R +} + +; *** The same as previous +; REF-LABEL: define private %i1_signext_one_out_arg_i32_1_use @i1_signext_one_out_arg_i32_1_use.body(i32* %val) #0 { +; REF-NEXT: ret %i1_signext_one_out_arg_i32_1_use { i1 true, i32 24 } + +; REF-LABEL: define signext i1 @i1_signext_one_out_arg_i32_1_use(i32* %0) #2 { +; REF-NEXT: %2 = call %i1_signext_one_out_arg_i32_1_use @i1_signext_one_out_arg_i32_1_use.body(i32* undef) +; REF-NEXT: %3 = extractvalue %i1_signext_one_out_arg_i32_1_use %2, 1 +; REF-NEXT: store i32 %3, i32* %0, align 4 +; REF-NEXT: %4 = extractvalue %i1_signext_one_out_arg_i32_1_use %2, 0 +; REF-NEXT: ret i1 %4 +define internal signext i1 @i1_signext_one_out_arg_i32_1_use(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 24, i32* [[VAL]], align 4 +; CHECK-NEXT: ret i1 true +; + store i32 24, i32* %val + ret i1 true +} + +define signext i1 @i1_signext_one_out_arg_i32_1_use_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call signext i1 @i1_signext_one_out_arg_i32_1_use(i32* [[VAL]]) +; CHECK-NEXT: ret i1 [[R]] +; + %R = call signext i1 @i1_signext_one_out_arg_i32_1_use(i32* %val) + ret i1 %R +} + +; *** The same as previous +; REF-LABEL: define private %p1i32_noalias_one_out_arg_i32_1_use @p1i32_noalias_one_out_arg_i32_1_use.body(i32* %val) #0 { +; REF-NEXT: ret %p1i32_noalias_one_out_arg_i32_1_use { i32 addrspace(1)* null, i32 24 } + +; REF-LABEL: define noalias i32 addrspace(1)* @p1i32_noalias_one_out_arg_i32_1_use(i32* %0) #2 { +; REF-NEXT: %2 = call %p1i32_noalias_one_out_arg_i32_1_use @p1i32_noalias_one_out_arg_i32_1_use.body(i32* undef) +; REF-NEXT: %3 = extractvalue %p1i32_noalias_one_out_arg_i32_1_use %2, 1 +; REF-NEXT: store i32 %3, i32* %0, align 4 +; REF-NEXT: %4 = extractvalue %p1i32_noalias_one_out_arg_i32_1_use %2, 0 +; REF-NEXT: ret i32 addrspace(1)* %4 +define internal noalias i32 addrspace(1)* @p1i32_noalias_one_out_arg_i32_1_use(i32* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use +; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store i32 24, i32* [[VAL]], align 4 +; CHECK-NEXT: ret i32 addrspace(1)* null +; + store i32 24, i32* %val + ret i32 addrspace(1)* null +} + +define noalias i32 addrspace(1)* @p1i32_noalias_one_out_arg_i32_1_use_caller(i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use_caller +; CHECK-SAME: (i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call noalias i32 addrspace(1)* @p1i32_noalias_one_out_arg_i32_1_use(i32* [[VAL]]) +; CHECK-NEXT: ret i32 addrspace(1)* [[R]] +; + %R = call noalias i32 addrspace(1)* @p1i32_noalias_one_out_arg_i32_1_use(i32* %val) + ret i32 addrspace(1)* %R +} + +; *** don't know why it wasn't handled before +; REF-LABEL: define internal void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 { +; REF-NEXT: store i32 0, i32 addrspace(1)* %val +; REF-NEXT: ret void +define internal void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_non_private_arg_i32_1_use +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: ret i32 0 +; + store i32 0, i32 addrspace(1)* %val + ret void +} + +define void @void_one_out_non_private_arg_i32_1_use_caller(i32 addrspace(1)* %val) { +; CHECK-LABEL: define {{[^@]+}}@void_one_out_non_private_arg_i32_1_use_caller +; CHECK-SAME: (i32 addrspace(1)* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @void_one_out_non_private_arg_i32_1_use() +; CHECK-NEXT: store i32 [[TMP1]], i32 addrspace(1)* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) + ret void +} + +; REF-LABEL: define private %func_ptr_type @func_ptr_type.body(void ()** %out) #0 { +; REF-LABEL: define internal void @func_ptr_type(void ()** %0) #2 { +; REF: %2 = call %func_ptr_type @func_ptr_type.body(void ()** undef) +define internal void @func_ptr_type(void()** %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@func_ptr_type +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: [[FUNC:%.*]] = load void ()*, void ()** undef, align 8 +; CHECK-NEXT: ret void ()* [[FUNC]] +; + %func = load void()*, void()** undef + store void()* %func, void()** %out + ret void +} + +define void @func_ptr_type_caller(void()** %out) { +; CHECK-LABEL: define {{[^@]+}}@func_ptr_type_caller +; CHECK-SAME: (void ()** [[OUT:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call void ()* @func_ptr_type() +; CHECK-NEXT: store void ()* [[TMP1]], void ()** [[OUT]], align 8 +; CHECK-NEXT: ret void +; + call void @func_ptr_type(void()** %out) + ret void +} + +; *** TODO: bitcasts not supported yet +; REF-LABEL: define private %bitcast_func_ptr_type @bitcast_func_ptr_type.body(void ()** %out) #0 { +; REF-LABEL: define internal void @bitcast_func_ptr_type(void ()** %0) #2 { +define internal void @bitcast_func_ptr_type(void()** %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type +; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[FUNC:%.*]] = load i32 ()*, i32 ()** undef, align 8 +; CHECK-NEXT: [[CAST:%.*]] = bitcast void ()** [[OUT]] to i32 ()** +; CHECK-NEXT: store i32 ()* [[FUNC]], i32 ()** [[CAST]], align 8 +; CHECK-NEXT: ret void +; + %func = load i32()*, i32()** undef + %cast = bitcast void()** %out to i32()** + store i32()* %func, i32()** %cast + ret void +} + +define void @bitcast_func_ptr_type_caller(void()** %out) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type_caller +; CHECK-SAME: (void ()** [[OUT:%.*]]) { +; CHECK-NEXT: call void @bitcast_func_ptr_type(void ()** [[OUT]]) +; CHECK-NEXT: ret void +; + call void @bitcast_func_ptr_type(void()** %out) + ret void +} + +; *** TODO: rettype not supported yet +; REF-LABEL: define private %out_arg_small_array @out_arg_small_array.body([4 x i32]* %val) #0 { +; REF-NEXT: ret %out_arg_small_array { [4 x i32] [i32 0, i32 1, i32 2, i32 3] } +; REF-LABEL: define internal void @out_arg_small_array([4 x i32]* %0) #2 { +define internal void @out_arg_small_array([4 x i32]* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array +; CHECK-SAME: ([4 x i32]* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store [4 x i32] [i32 0, i32 1, i32 2, i32 3], [4 x i32]* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + store [4 x i32] [i32 0, i32 1, i32 2, i32 3], [4 x i32]* %val + ret void +} + +define void @out_arg_small_array_caller([4 x i32]* %val) { +; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array_caller +; CHECK-SAME: ([4 x i32]* [[VAL:%.*]]) { +; CHECK-NEXT: call void @out_arg_small_array([4 x i32]* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @out_arg_small_array([4 x i32]* %val) + ret void +} + +; *** TODO: check if the behaviour is the same, when the prev testcase is done +; REF-LABEL: define internal void @out_arg_large_array([17 x i32]* %val) #0 { +; REF-NEXT: store [17 x i32] zeroinitializer, [17 x i32]* %val +; REF-NEXT: ret void +define internal void @out_arg_large_array([17 x i32]* %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@out_arg_large_array +; CHECK-SAME: ([17 x i32]* [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: store [17 x i32] zeroinitializer, [17 x i32]* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + store [17 x i32] zeroinitializer, [17 x i32]* %val + ret void +} + +define void @out_arg_large_array_caller([17 x i32]* %val) { +; CHECK-LABEL: define {{[^@]+}}@out_arg_large_array_caller +; CHECK-SAME: ([17 x i32]* [[VAL:%.*]]) { +; CHECK-NEXT: call void @out_arg_large_array([17 x i32]* [[VAL]]) +; CHECK-NEXT: ret void +; + call void @out_arg_large_array([17 x i32]* %val) + ret void +} + +; *** different: no checks on register usage yet +; REF-LABEL: define <16 x i32> @num_regs_return_limit(i32* %out, i32 %val) #0 { +define internal <16 x i32> @num_regs_return_limit(i32* %out, i32 %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@num_regs_return_limit +; CHECK-SAME: (i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile <16 x i32>, <16 x i32> addrspace(1)* undef, align 64 +; CHECK-NEXT: [[NUM_REGS_RETURN_LIMIT_RET:%.*]] = insertvalue { <16 x i32>, i32 } undef, <16 x i32> [[LOAD]], 0 +; CHECK-NEXT: [[NUM_REGS_RETURN_LIMIT_RET1:%.*]] = insertvalue { <16 x i32>, i32 } [[NUM_REGS_RETURN_LIMIT_RET]], i32 [[VAL]], 1 +; CHECK-NEXT: ret { <16 x i32>, i32 } [[NUM_REGS_RETURN_LIMIT_RET1]] +; + %load = load volatile <16 x i32>, <16 x i32> addrspace(1)* undef + store i32 %val, i32* %out + ret <16 x i32> %load +} + +define <16 x i32> @num_regs_return_limit_caller(i32* %out, i32 %val) { +; CHECK-LABEL: define {{[^@]+}}@num_regs_return_limit_caller +; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call { <16 x i32>, i32 } @num_regs_return_limit(i32 [[VAL]]) +; CHECK-NEXT: [[R_RET:%.*]] = extractvalue { <16 x i32>, i32 } [[R]], 0 +; CHECK-NEXT: [[OUT_VAL_RET:%.*]] = extractvalue { <16 x i32>, i32 } [[R]], 1 +; CHECK-NEXT: store i32 [[OUT_VAL_RET]], i32* [[OUT]], align 4 +; CHECK-NEXT: ret <16 x i32> [[R_RET]] +; + %R = call <16 x i32> @num_regs_return_limit(i32* %out, i32 %val) + ret <16 x i32> %R +} + +; *** different: no checks on register usage yet +; REF-LABEL: define private %num_regs_reach_limit @num_regs_reach_limit.body(i32* %out, i32 %val) #0 { +; REF: define [15 x i32] @num_regs_reach_limit(i32* %0, i32 %1) #2 { +; REF-NEXT: call %num_regs_reach_limit @num_regs_reach_limit.body(i32* undef, i32 %1) +define internal [15 x i32] @num_regs_reach_limit(i32* %out, i32 %val) #0 { +; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit +; CHECK-SAME: (i32 [[VAL:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* undef, align 4 +; CHECK-NEXT: [[NUM_REGS_REACH_LIMIT_RET:%.*]] = insertvalue { [15 x i32], i32 } undef, [15 x i32] [[LOAD]], 0 +; CHECK-NEXT: [[NUM_REGS_REACH_LIMIT_RET1:%.*]] = insertvalue { [15 x i32], i32 } [[NUM_REGS_REACH_LIMIT_RET]], i32 [[VAL]], 1 +; CHECK-NEXT: ret { [15 x i32], i32 } [[NUM_REGS_REACH_LIMIT_RET1]] +; + %load = load volatile [15 x i32], [15 x i32] addrspace(1)* undef + store i32 %val, i32* %out + ret [15 x i32] %load +} + +define [15 x i32] @num_regs_reach_limit_caller(i32* %out, i32 %val) { +; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_caller +; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call { [15 x i32], i32 } @num_regs_reach_limit(i32 [[VAL]]) +; CHECK-NEXT: [[R_RET:%.*]] = extractvalue { [15 x i32], i32 } [[R]], 0 +; CHECK-NEXT: [[OUT_VAL_RET:%.*]] = extractvalue { [15 x i32], i32 } [[R]], 1 +; CHECK-NEXT: store i32 [[OUT_VAL_RET]], i32* [[OUT]], align 4 +; CHECK-NEXT: ret [15 x i32] [[R_RET]] +; + %R = call [15 x i32] @num_regs_reach_limit(i32* %out, i32 %val) + ret [15 x i32] %R +} + +; *** different: no checks on register usage yet +; REF-LABEL: define private %num_regs_reach_limit_leftover @num_regs_reach_limit_leftover.body(i32* %out0, i32* %out1, i32 %val0) #0 { +; REF-NEXT: %load0 = load volatile [15 x i32], [15 x i32] addrspace(1)* undef +; REF-NEXT: %load1 = load volatile i32, i32 addrspace(1)* undef +; REF-NEXT: %1 = insertvalue %num_regs_reach_limit_leftover undef, [15 x i32] %load0, 0 +; REF-NEXT: %2 = insertvalue %num_regs_reach_limit_leftover %1, i32 %load1, 1 +; REF-NEXT: %3 = insertvalue %num_regs_reach_limit_leftover %2, i32 %val0, 2 +; REF-NEXT: ret %num_regs_reach_limit_leftover %3 + +; REF-LABEL: define [15 x i32] @num_regs_reach_limit_leftover(i32* %0, i32* %1, i32 %2) #2 { +; REF-NEXT: %4 = call %num_regs_reach_limit_leftover @num_regs_reach_limit_leftover.body(i32* undef, i32* undef, i32 %2) +; REF-NEXT: %5 = extractvalue %num_regs_reach_limit_leftover %4, 1 +; REF-NEXT: store i32 %5, i32* %0, align 4 +; REF-NEXT: %6 = extractvalue %num_regs_reach_limit_leftover %4, 2 +; REF-NEXT: store i32 %6, i32* %1, align 4 +; REF-NEXT: %7 = extractvalue %num_regs_reach_limit_leftover %4, 0 +; REF-NEXT: ret [15 x i32] %7 +define internal [15 x i32] @num_regs_reach_limit_leftover(i32* %out0, i32* %out1, i32 %val0) #0 { +; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover +; CHECK-SAME: (i32 [[VAL0:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD0:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* undef, align 4 +; CHECK-NEXT: [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; CHECK-NEXT: [[NUM_REGS_REACH_LIMIT_LEFTOVER_RET:%.*]] = insertvalue { [15 x i32], i32, i32 } undef, [15 x i32] [[LOAD0]], 0 +; CHECK-NEXT: [[NUM_REGS_REACH_LIMIT_LEFTOVER_RET1:%.*]] = insertvalue { [15 x i32], i32, i32 } [[NUM_REGS_REACH_LIMIT_LEFTOVER_RET]], i32 [[VAL0]], 1 +; CHECK-NEXT: [[NUM_REGS_REACH_LIMIT_LEFTOVER_RET2:%.*]] = insertvalue { [15 x i32], i32, i32 } [[NUM_REGS_REACH_LIMIT_LEFTOVER_RET1]], i32 [[LOAD1]], 2 +; CHECK-NEXT: ret { [15 x i32], i32, i32 } [[NUM_REGS_REACH_LIMIT_LEFTOVER_RET2]] +; + %load0 = load volatile [15 x i32], [15 x i32] addrspace(1)* undef + %load1 = load volatile i32, i32 addrspace(1)* undef + store i32 %val0, i32* %out0 + store i32 %load1, i32* %out1 + ret [15 x i32] %load0 +} + +define [15 x i32] @num_regs_reach_limit_leftover_caller(i32* %out0, i32* %out1, i32 %val0) { +; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover_caller +; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]], i32 [[VAL0:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = call { [15 x i32], i32, i32 } @num_regs_reach_limit_leftover(i32 [[VAL0]]) +; CHECK-NEXT: [[R_RET:%.*]] = extractvalue { [15 x i32], i32, i32 } [[R]], 0 +; CHECK-NEXT: [[OUT0_VAL_RET:%.*]] = extractvalue { [15 x i32], i32, i32 } [[R]], 1 +; CHECK-NEXT: store i32 [[OUT0_VAL_RET]], i32* [[OUT0]], align 4 +; CHECK-NEXT: [[OUT1_VAL_RET:%.*]] = extractvalue { [15 x i32], i32, i32 } [[R]], 2 +; CHECK-NEXT: store i32 [[OUT1_VAL_RET]], i32* [[OUT1]], align 4 +; CHECK-NEXT: ret [15 x i32] [[R_RET]] +; + %R = call [15 x i32] @num_regs_reach_limit_leftover(i32* %out0, i32* %out1, i32 %val0) + ret [15 x i32] %R +} + +; *** TODO: preserve debug info +; REF-LABEL: define private %preserve_debug_info @preserve_debug_info.body(i32 %arg0, i32* %val) #0 { +; REF-NEXT: call void @may.clobber(), !dbg !5 +; REF-NEXT: %1 = insertvalue %preserve_debug_info undef, i32 %arg0, 0, !dbg !11 +; REF-NEXT: ret %preserve_debug_info %1, !dbg !11 + +; REF-LABEL: define internal void @preserve_debug_info(i32 %0, i32* %1) #2 !dbg !6 { +; REF-NEXT: %3 = call %preserve_debug_info @preserve_debug_info.body(i32 %0, i32* undef){{$}} +; REF-NEXT: %4 = extractvalue %preserve_debug_info %3, 0{{$}} +; REF-NEXT: store i32 %4, i32* %1, align 4{{$}} +; REF-NEXT: ret void +define internal void @preserve_debug_info(i32 %arg0, i32* %val) #0 !dbg !5 { +; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info +; CHECK-SAME: (i32 [[ARG0:%.*]]) #[[ATTR0]] !dbg [[DBG5:![0-9]+]] { +; CHECK-NEXT: call void @may.clobber(), !dbg [[DBG10:![0-9]+]] +; CHECK-NEXT: ret i32 [[ARG0]] +; + call void @may.clobber(), !dbg !10 + store i32 %arg0, i32* %val, !dbg !11 + ret void, !dbg !12 +} + +define void @preserve_debug_info_caller(i32 %arg0, i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info_caller +; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @preserve_debug_info(i32 [[ARG0]]) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @preserve_debug_info(i32 %arg0, i32* %val) + ret void +} + +define internal void @preserve_metadata(i32 %arg0, i32* %val) #0 !kernel_arg_access_qual !13 { +; CHECK-LABEL: define {{[^@]+}}@preserve_metadata +; CHECK-SAME: (i32 [[ARG0:%.*]]) #[[ATTR0]] !kernel_arg_access_qual !11 { +; CHECK-NEXT: call void @may.clobber() +; CHECK-NEXT: ret i32 [[ARG0]] +; + call void @may.clobber() + store i32 %arg0, i32* %val + ret void +} + +define void @preserve_metadata_caller(i32 %arg0, i32* %val) { +; CHECK-LABEL: define {{[^@]+}}@preserve_metadata_caller +; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @preserve_metadata(i32 [[ARG0]]) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[VAL]], align 4 +; CHECK-NEXT: ret void +; + call void @preserve_metadata(i32 %arg0, i32* %val) + ret void +} + +; *** TODO: bitcast support +; Clang emits this pattern for 3-vectors for some reason. +; REF-LABEL: define private %bitcast_pointer_v4i32_v3i32 @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* %out) #0 { +; REF-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef +; REF-NEXT: %bitcast = bitcast <3 x i32>* %out to <4 x i32>* +; REF-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> poison, <3 x i32> +; REF-NEXT: %2 = insertvalue %bitcast_pointer_v4i32_v3i32 undef, <3 x i32> %1, 0 +; REF-NEXT: ret %bitcast_pointer_v4i32_v3i32 %2 + +; REF-LABEL: define internal void @bitcast_pointer_v4i32_v3i32(<3 x i32>* %0) #2 { +; REF-NEXT: %2 = call %bitcast_pointer_v4i32_v3i32 @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* undef) +; REF-NEXT: %3 = extractvalue %bitcast_pointer_v4i32_v3i32 %2, 0 +; REF-NEXT: store <3 x i32> %3, <3 x i32>* %0, align 16 +; REF-NEXT: ret void +define internal void @bitcast_pointer_v4i32_v3i32(<3 x i32>* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32 +; CHECK-SAME: (<3 x i32>* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x i32>* [[OUT]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[LOAD]], <4 x i32>* [[BITCAST]], align 16 +; CHECK-NEXT: ret void +; + %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef + %bitcast = bitcast <3 x i32>* %out to <4 x i32>* + store <4 x i32> %load, <4 x i32>* %bitcast + ret void +} + +define void @bitcast_pointer_v4i32_v3i32_caller(<3 x i32>* %out) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32_caller +; CHECK-SAME: (<3 x i32>* [[OUT:%.*]]) { +; CHECK-NEXT: call void @bitcast_pointer_v4i32_v3i32(<3 x i32>* [[OUT]]) +; CHECK-NEXT: ret void +; + call void @bitcast_pointer_v4i32_v3i32(<3 x i32>* %out) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define private %bitcast_pointer_v4i32_v3f32 @bitcast_pointer_v4i32_v3f32.body(<3 x float>* %out) #0 { +; REF-NEXT: %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef +; REF-NEXT: %bitcast = bitcast <3 x float>* %out to <4 x i32>* +; REF-NEXT: %1 = shufflevector <4 x i32> %load, <4 x i32> poison, <3 x i32> +; REF-NEXT: %2 = bitcast <3 x i32> %1 to <3 x float> +; REF-NEXT: %3 = insertvalue %bitcast_pointer_v4i32_v3f32 undef, <3 x float> %2, 0 +; REF-NEXT: ret %bitcast_pointer_v4i32_v3f32 %3 +define internal void @bitcast_pointer_v4i32_v3f32(<3 x float>* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32 +; CHECK-SAME: (<3 x float>* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <3 x float>* [[OUT]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[LOAD]], <4 x i32>* [[BITCAST]], align 16 +; CHECK-NEXT: ret void +; + %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef + %bitcast = bitcast <3 x float>* %out to <4 x i32>* + store <4 x i32> %load, <4 x i32>* %bitcast + ret void +} + +define void @bitcast_pointer_v4i32_v3f32_caller(<3 x float>* %out) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32_caller +; CHECK-SAME: (<3 x float>* [[OUT:%.*]]) { +; CHECK-NEXT: call void @bitcast_pointer_v4i32_v3f32(<3 x float>* [[OUT]]) +; CHECK-NEXT: ret void +; + call void @bitcast_pointer_v4i32_v3f32(<3 x float>* %out) + ret void +} + +; *** TODO: bitcast support +; Try different element and bitwidths which could produce broken +; casts. + +; REF-LABEL: define private %bitcast_pointer_i32_f32 @bitcast_pointer_i32_f32.body(float* %out) #0 { +; REF-NEXT: %load = load volatile i32, i32 addrspace(1)* undef +; REF-NEXT: %bitcast = bitcast float* %out to i32* +; REF-NEXT: %1 = bitcast i32 %load to float +; REF-NEXT: %2 = insertvalue %bitcast_pointer_i32_f32 undef, float %1, 0 +; REF-NEXT: ret %bitcast_pointer_i32_f32 %2 + +; REF-LABEL: define internal void @bitcast_pointer_i32_f32(float* %0) #2 { +; REF-NEXT: %2 = call %bitcast_pointer_i32_f32 @bitcast_pointer_i32_f32.body(float* undef) +; REF-NEXT: %3 = extractvalue %bitcast_pointer_i32_f32 %2, 0 +; REF-NEXT: store float %3, float* %0, align 4 +define internal void @bitcast_pointer_i32_f32(float* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32 +; CHECK-SAME: (float* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float* [[OUT]] to i32* +; CHECK-NEXT: store i32 [[LOAD]], i32* [[BITCAST]], align 4 +; CHECK-NEXT: ret void +; + %load = load volatile i32, i32 addrspace(1)* undef + %bitcast = bitcast float* %out to i32* + store i32 %load, i32* %bitcast + ret void +} + +define void @bitcast_pointer_i32_f32_caller(float* %out) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32_caller +; CHECK-SAME: (float* [[OUT:%.*]]) { +; CHECK-NEXT: call void @bitcast_pointer_i32_f32(float* [[OUT]]) +; CHECK-NEXT: ret void +; + call void @bitcast_pointer_i32_f32(float* %out) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define internal void @bitcast_pointer_i32_f16(half* %out) #0 { +; REF-NOT: call +define internal void @bitcast_pointer_i32_f16(half* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16 +; CHECK-SAME: (half* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast half* [[OUT]] to i32* +; CHECK-NEXT: store i32 [[LOAD]], i32* [[BITCAST]], align 4 +; CHECK-NEXT: ret void +; + %load = load volatile i32, i32 addrspace(1)* undef + %bitcast = bitcast half* %out to i32* + store i32 %load, i32* %bitcast + ret void +} + +define void @bitcast_pointer_i32_f16_caller(half* %out) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16_caller +; CHECK-SAME: (half* [[OUT:%.*]]) { +; CHECK-NEXT: call void @bitcast_pointer_i32_f16(half* [[OUT]]) +; CHECK-NEXT: ret void +; + call void @bitcast_pointer_i32_f16(half* %out) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define internal void @bitcast_pointer_f16_i32(i32* %out) #0 { +; REF-NOT: call +define internal void @bitcast_pointer_f16_i32(i32* %out) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32 +; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LOAD:%.*]] = load volatile half, half addrspace(1)* undef, align 2 +; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[OUT]] to half* +; CHECK-NEXT: store half [[LOAD]], half* [[BITCAST]], align 2 +; CHECK-NEXT: ret void +; + %load = load volatile half, half addrspace(1)* undef + %bitcast = bitcast i32* %out to half* + store half %load, half* %bitcast + ret void +} + +define void @bitcast_pointer_f16_i32_caller(i32* %out) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32_caller +; CHECK-SAME: (i32* [[OUT:%.*]]) { +; CHECK-NEXT: call void @bitcast_pointer_f16_i32(i32* [[OUT]]) +; CHECK-NEXT: ret void +; + call void @bitcast_pointer_f16_i32(i32* %out) + ret void +} + +%struct.i128 = type { i128 } +%struct.v2f32 = type { <2 x float> } +%struct.v3f32 = type { <3 x float> } +%struct.v3f32.f32 = type { <3 x float>, float } +%struct.v4f32 = type { <4 x float> } + +; *** TODO: bitcast support +; REF-LABEL: define private %bitcast_struct_v3f32_v3f32 @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* %out, <3 x float> %value) #0 { +; REF-NEXT: %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> +; REF-NEXT: %cast = bitcast %struct.v3f32* %out to <4 x float>* +; REF-NEXT: %1 = shufflevector <4 x float> %extractVec, <4 x float> poison, <3 x i32> +; REF-NEXT: %2 = insertvalue %struct.v3f32 undef, <3 x float> %1, 0 +; REF-NEXT: %3 = insertvalue %bitcast_struct_v3f32_v3f32 undef, %struct.v3f32 %2, 0 +; REF-NEXT: ret %bitcast_struct_v3f32_v3f32 %3 + +; REF-LABEL: define internal void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %0, <3 x float> %1) #2 { +; REF-NEXT: %3 = call %bitcast_struct_v3f32_v3f32 @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* undef, <3 x float> %1) +; REF-NEXT: %4 = extractvalue %bitcast_struct_v3f32_v3f32 %3, 0 +; REF-NEXT: store %struct.v3f32 %4, %struct.v3f32* %0, align 16 +; REF-NEXT: ret void +define internal void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32 +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], <4 x float>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> + %cast = bitcast %struct.v3f32* %out to <4 x float>* + store <4 x float> %extractVec, <4 x float>* %cast, align 16 + ret void +} + +define void @bitcast_struct_v3f32_v3f32_caller(%struct.v3f32* %out, <3 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32_caller +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v3f32_v3f32(%struct.v3f32* [[OUT]], <3 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define private %bitcast_struct_v3f32_v3i32 @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* %out, <3 x i32> %value) #0 { +; REF-NEXT: %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> +; REF-NEXT: %cast = bitcast %struct.v3f32* %out to <4 x i32>* +; REF-NEXT: %1 = shufflevector <4 x i32> %extractVec, <4 x i32> poison, <3 x i32> +; REF-NEXT: %2 = bitcast <3 x i32> %1 to <3 x float> +; REF-NEXT: %3 = insertvalue %struct.v3f32 undef, <3 x float> %2, 0 +; REF-NEXT: %4 = insertvalue %bitcast_struct_v3f32_v3i32 undef, %struct.v3f32 %3, 0 +; REF-NEXT: ret %bitcast_struct_v3f32_v3i32 %4 + +; REF-LABEL: define internal void @bitcast_struct_v3f32_v3i32(%struct.v3f32* %0, <3 x i32> %1) #2 { +; REF-NEXT: %3 = call %bitcast_struct_v3f32_v3i32 @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* undef, <3 x i32> %1) +; REF-NEXT: %4 = extractvalue %bitcast_struct_v3f32_v3i32 %3, 0 +; REF-NEXT: store %struct.v3f32 %4, %struct.v3f32* %0, align 16 +define internal void @bitcast_struct_v3f32_v3i32(%struct.v3f32* %out, <3 x i32> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32 +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[EXTRACTVEC]], <4 x i32>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> + %cast = bitcast %struct.v3f32* %out to <4 x i32>* + store <4 x i32> %extractVec, <4 x i32>* %cast, align 16 + ret void +} + +define void @bitcast_struct_v3f32_v3i32_caller(%struct.v3f32* %out, <3 x i32> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32_caller +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v3f32_v3i32(%struct.v3f32* [[OUT]], <3 x i32> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v3f32_v3i32(%struct.v3f32* %out, <3 x i32> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define private %bitcast_struct_v4f32_v4f32 @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* %out, <4 x float> %value) #0 { +; REF-NEXT: %cast = bitcast %struct.v4f32* %out to <4 x float>* +; REF-NEXT: %1 = insertvalue %struct.v4f32 undef, <4 x float> %value, 0 +; REF-NEXT: %2 = insertvalue %bitcast_struct_v4f32_v4f32 undef, %struct.v4f32 %1, 0 +; REF-NEXT: ret %bitcast_struct_v4f32_v4f32 %2 + +; REF-LABEL: define internal void @bitcast_struct_v4f32_v4f32(%struct.v4f32* %0, <4 x float> %1) #2 { +; REF-NEXT: %3 = call %bitcast_struct_v4f32_v4f32 @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* undef, <4 x float> %1) +define internal void @bitcast_struct_v4f32_v4f32(%struct.v4f32* %out, <4 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32 +; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[VALUE]], <4 x float>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %cast = bitcast %struct.v4f32* %out to <4 x float>* + store <4 x float> %value, <4 x float>* %cast, align 16 + ret void +} + +define void @bitcast_struct_v4f32_v4f32_caller(%struct.v4f32* %out, <4 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32_caller +; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v4f32_v4f32(%struct.v4f32* [[OUT]], <4 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v4f32_v4f32(%struct.v4f32* %out, <4 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define private %bitcast_struct_v3f32_v4i32 @bitcast_struct_v3f32_v4i32.body(%struct.v3f32* %out, <4 x i32> %value) #0 { +; REF-LABEL: define internal void @bitcast_struct_v3f32_v4i32(%struct.v3f32* %0, <4 x i32> %1) #2 { +define internal void @bitcast_struct_v3f32_v4i32(%struct.v3f32* %out, <4 x i32> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32 +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[VALUE]], <4 x i32>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %cast = bitcast %struct.v3f32* %out to <4 x i32>* + store <4 x i32> %value, <4 x i32>* %cast, align 16 + ret void +} + +define void @bitcast_struct_v3f32_v4i32_caller(%struct.v3f32* %out, <4 x i32> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32_caller +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v3f32_v4i32(%struct.v3f32* [[OUT]], <4 x i32> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v3f32_v4i32(%struct.v3f32* %out, <4 x i32> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define private %bitcast_struct_v4f32_v3f32 @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* %out, <3 x float> %value) #0 { +; REF-LABEL: define internal void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %0, <3 x float> %1) #2 { +define internal void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %out, <3 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32 +; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], <4 x float>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> + %cast = bitcast %struct.v4f32* %out to <4 x float>* + store <4 x float> %extractVec, <4 x float>* %cast, align 16 + ret void +} + +define void @bitcast_struct_v4f32_v3f32_caller(%struct.v4f32* %out, <3 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32_caller +; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v4f32_v3f32(%struct.v4f32* [[OUT]], <3 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %out, <3 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define internal void @bitcast_struct_v3f32_v2f32(%struct.v3f32* %out, <2 x float> %value) #0 { +; REF-NOT: call +define internal void @bitcast_struct_v3f32_v2f32(%struct.v3f32* %out, <2 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32 +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <2 x float>* +; CHECK-NEXT: store <2 x float> [[VALUE]], <2 x float>* [[CAST]], align 8 +; CHECK-NEXT: ret void +; + %cast = bitcast %struct.v3f32* %out to <2 x float>* + store <2 x float> %value, <2 x float>* %cast, align 8 + ret void +} + +define void @bitcast_struct_v3f32_v2f32_caller(%struct.v3f32* %out, <2 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32_caller +; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v3f32_v2f32(%struct.v3f32* [[OUT]], <2 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v3f32_v2f32(%struct.v3f32* %out, <2 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define internal void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) #0 { +; REF-NOT: call +define internal void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32 +; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], <4 x float>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> + %cast = bitcast %struct.v3f32.f32* %out to <4 x float>* + store <4 x float> %extractVec, <4 x float>* %cast, align 16 + ret void +} + +define void @bitcast_struct_v3f32_f32_v3f32_caller(%struct.v3f32.f32* %out, <3 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32_caller +; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* [[OUT]], <3 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define internal void @bitcast_struct_v3f32_f32_v4f32(%struct.v3f32.f32* %out, <4 x float> %value) #0 { +; REF-NOT: call +define internal void @bitcast_struct_v3f32_f32_v4f32(%struct.v3f32.f32* %out, <4 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32 +; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[VALUE]], <4 x float>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %cast = bitcast %struct.v3f32.f32* %out to <4 x float>* + store <4 x float> %value, <4 x float>* %cast, align 16 + ret void +} + +define void @bitcast_struct_v3f32_f32_v4f32_caller(%struct.v3f32.f32* %out, <4 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32_caller +; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_v3f32_f32_v4f32(%struct.v3f32.f32* [[OUT]], <4 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_v3f32_f32_v4f32(%struct.v3f32.f32* %out, <4 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define private %bitcast_struct_i128_v4f32 @bitcast_struct_i128_v4f32.body(%struct.i128* %out, <4 x float> %value) #0 { +; REF-NEXT: %cast = bitcast %struct.i128* %out to <4 x float>* +; REF-NEXT: %1 = bitcast <4 x float> %value to i128 +; REF-NEXT: %2 = insertvalue %struct.i128 undef, i128 %1, 0 +; REF-NEXT: %3 = insertvalue %bitcast_struct_i128_v4f32 undef, %struct.i128 %2, 0 +; REF-NEXT: ret %bitcast_struct_i128_v4f32 %3 +define internal void @bitcast_struct_i128_v4f32(%struct.i128* %out, <4 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32 +; CHECK-SAME: (%struct.i128* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = bitcast %struct.i128* [[OUT]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[VALUE]], <4 x float>* [[CAST]], align 16 +; CHECK-NEXT: ret void +; + %cast = bitcast %struct.i128* %out to <4 x float>* + store <4 x float> %value, <4 x float>* %cast, align 16 + ret void +} + +define void @bitcast_struct_i128_v4f32_caller(%struct.i128* %out, <4 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32_caller +; CHECK-SAME: (%struct.i128* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_struct_i128_v4f32(%struct.i128* [[OUT]], <4 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_struct_i128_v4f32(%struct.i128* %out, <4 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define internal void @bitcast_struct_i128_v4f32(%struct.i128* %0, <4 x float> %1) #2 { +; REF-NEXT: %3 = call %bitcast_struct_i128_v4f32 @bitcast_struct_i128_v4f32.body(%struct.i128* undef, <4 x float> %1) +define internal void @bitcast_array_v4i32_v4f32([4 x i32]* %out, [4 x float] %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32 +; CHECK-SAME: ([4 x i32]* [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = bitcast [4 x i32]* [[OUT]] to [4 x float]* +; CHECK-NEXT: store [4 x float] [[VALUE]], [4 x float]* [[CAST]], align 4 +; CHECK-NEXT: ret void +; + %cast = bitcast [4 x i32]* %out to [4 x float]* + store [4 x float] %value, [4 x float]* %cast, align 4 + ret void +} + +define void @bitcast_array_v4i32_v4f32_caller([4 x i32]* %out, [4 x float] %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32_caller +; CHECK-SAME: ([4 x i32]* [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_array_v4i32_v4f32([4 x i32]* [[OUT]], [4 x float] [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_array_v4i32_v4f32([4 x i32]* %out, [4 x float] %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define private %multi_return_bitcast_struct_v3f32_v3f32 @multi_return_bitcast_struct_v3f32_v3f32.body(i1 %cond, %struct.v3f32* %out, <3 x float> %value) #0 { +; REF: ret0: +; REF: %cast0 = bitcast %struct.v3f32* %out to <4 x float>* +; REF: %0 = shufflevector <4 x float> %extractVec, <4 x float> poison, <3 x i32> +; REF: %1 = insertvalue %struct.v3f32 undef, <3 x float> %0, 0 +; REF: %2 = insertvalue %multi_return_bitcast_struct_v3f32_v3f32 undef, %struct.v3f32 %1, 0 +; REF: ret %multi_return_bitcast_struct_v3f32_v3f32 %2 + +; REF: ret1: +; REF: %4 = insertvalue %struct.v3f32 undef, <3 x float> %3, 0 +; REF: %5 = insertvalue %multi_return_bitcast_struct_v3f32_v3f32 undef, %struct.v3f32 %4, 0 +; REF: ret %multi_return_bitcast_struct_v3f32_v3f32 %5 +define internal void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, %struct.v3f32* %out, <3 x float> %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32 +; CHECK-SAME: (i1 [[COND:%.*]], %struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND]], label [[RET0:%.*]], label [[RET1:%.*]] +; CHECK: ret0: +; CHECK-NEXT: [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> +; CHECK-NEXT: [[CAST0:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[EXTRACTVEC]], <4 x float>* [[CAST0]], align 16 +; CHECK-NEXT: ret void +; CHECK: ret1: +; CHECK-NEXT: [[CAST1:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>* +; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float> addrspace(1)* undef, align 16 +; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[CAST1]], align 16 +; CHECK-NEXT: ret void +; +entry: + br i1 %cond, label %ret0, label %ret1 + +ret0: + %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> + %cast0 = bitcast %struct.v3f32* %out to <4 x float>* + store <4 x float> %extractVec, <4 x float>* %cast0, align 16 + ret void + +ret1: + %cast1 = bitcast %struct.v3f32* %out to <4 x float>* + %load = load <4 x float>, <4 x float> addrspace(1)* undef + store <4 x float> %load, <4 x float>* %cast1, align 16 + ret void +} + +define void @multi_return_bitcast_struct_v3f32_v3f32_caller(i1 %cond, %struct.v3f32* %out, <3 x float> %value) { +; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32_caller +; CHECK-SAME: (i1 [[COND:%.*]], %struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) { +; CHECK-NEXT: call void @multi_return_bitcast_struct_v3f32_v3f32(i1 [[COND]], %struct.v3f32* [[OUT]], <3 x float> [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, %struct.v3f32* %out, <3 x float> %value) + ret void +} + +; *** TODO: bitcast support +; REF-LABEL: define internal void @bitcast_v3f32_struct_v3f32(<3 x float>* %out, %struct.v3f32 %value) #0 { +; REF-NOT: call +define internal void @bitcast_v3f32_struct_v3f32(<3 x float>* %out, %struct.v3f32 %value) #0 { +; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32 +; CHECK-SAME: (<3 x float>* [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[CAST:%.*]] = bitcast <3 x float>* [[OUT]] to %struct.v3f32* +; CHECK-NEXT: store [[STRUCT_V3F32]] [[VALUE]], %struct.v3f32* [[CAST]], align 4 +; CHECK-NEXT: ret void +; + %cast = bitcast <3 x float>* %out to %struct.v3f32* + store %struct.v3f32 %value, %struct.v3f32* %cast, align 4 + ret void +} + +define void @bitcast_v3f32_struct_v3f32_caller(<3 x float>* %out, %struct.v3f32 %value) { +; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32_caller +; CHECK-SAME: (<3 x float>* [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) { +; CHECK-NEXT: call void @bitcast_v3f32_struct_v3f32(<3 x float>* [[OUT]], [[STRUCT_V3F32]] [[VALUE]]) +; CHECK-NEXT: ret void +; + call void @bitcast_v3f32_struct_v3f32(<3 x float>* %out, %struct.v3f32 %value) + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind noinline optnone } +attributes #2 = { alwaysinline nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "code-object-metadata-kernel-debug-props.cl", directory: "/some/random/directory") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 2} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2) +!6 = !DISubroutineType(types: !7) +!7 = !{null, !8} +!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64) +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !DILocation(line: 2, column: 3, scope: !5) +!11 = !DILocation(line: 2, column: 8, scope: !5) +!12 = !DILocation(line: 3, column: 3, scope: !5) +!13 = !{!"none"} diff --git a/llvm/test/Transforms/ArgumentPromotion/opaque-ptr.ll b/llvm/test/Transforms/ArgumentPromotion/opaque-ptr.ll --- a/llvm/test/Transforms/ArgumentPromotion/opaque-ptr.ll +++ b/llvm/test/Transforms/ArgumentPromotion/opaque-ptr.ll @@ -1,11 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes -; RUN: opt -S -argpromotion -opaque-pointers < %s | FileCheck %s +; RUN: opt -S -argpromotion -opaque-pointers < %s | FileCheck -check-prefixes=ALL,NO-MSSA %s +; RUN: opt -S -mssaargpromotion -opaque-pointers < %s | FileCheck -check-prefixes=ALL,MSSA --allow-unused-prefixes %s define internal i32 @callee_basic(ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@callee_basic -; CHECK-SAME: (i32 [[P_0_VAL:%.*]], i32 [[P_4_VAL:%.*]]) { -; CHECK-NEXT: [[Z:%.*]] = add i32 [[P_0_VAL]], [[P_4_VAL]] -; CHECK-NEXT: ret i32 [[Z]] +; NO-MSSA-LABEL: define {{[^@]+}}@callee_basic +; NO-MSSA-SAME: (i32 [[P_0_VAL:%.*]], i32 [[P_4_VAL:%.*]]) { +; NO-MSSA-NEXT: [[Z:%.*]] = add i32 [[P_0_VAL]], [[P_4_VAL]] +; NO-MSSA-NEXT: ret i32 [[Z]] ; %x = load i32, ptr %p %p1 = getelementptr i8, ptr %p, i64 4 @@ -15,27 +16,50 @@ } define void @caller_basic(ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@caller_basic -; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 4 -; CHECK-NEXT: [[P_VAL1:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @callee_basic(i32 [[P_VAL]], i32 [[P_VAL1]]) -; CHECK-NEXT: ret void +; NO-MSSA-LABEL: define {{[^@]+}}@caller_basic +; NO-MSSA-SAME: (ptr [[P:%.*]]) { +; NO-MSSA-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 +; NO-MSSA-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P]], i64 4 +; NO-MSSA-NEXT: [[P_VAL1:%.*]] = load i32, ptr [[TMP1]], align 4 +; NO-MSSA-NEXT: [[TMP2:%.*]] = call i32 @callee_basic(i32 [[P_VAL]], i32 [[P_VAL1]]) +; NO-MSSA-NEXT: ret void ; call i32 @callee_basic(ptr %p) ret void } +define internal i32 @callee_basic_nogeps(ptr %p) { +; ALL-LABEL: define {{[^@]+}}@callee_basic_nogeps +; ALL-SAME: (i32 [[P_0_VAL:%.*]]) { +; ALL-NEXT: [[Z:%.*]] = add i32 [[P_0_VAL]], [[P_0_VAL]] +; ALL-NEXT: ret i32 [[Z]] +; + %x = load i32, ptr %p + %y = load i32, ptr %p + %z = add i32 %x, %y + ret i32 %z +} + +define void @caller_basic_nogeps(ptr %p) { +; ALL-LABEL: define {{[^@]+}}@caller_basic_nogeps +; ALL-SAME: (ptr [[P:%.*]]) { +; ALL-NEXT: [[P_VAL:%.*]] = load i32, ptr [[P]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = call i32 @callee_basic_nogeps(i32 [[P_VAL]]) +; ALL-NEXT: ret void +; + call i32 @callee_basic_nogeps(ptr %p) + ret void +} + ; Same offset is loaded with two differen types: Don't promote. define internal i32 @callee_different_types(ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@callee_different_types -; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4 -; CHECK-NEXT: [[Y_F:%.*]] = load float, ptr [[P]], align 4 -; CHECK-NEXT: [[Y:%.*]] = fptoui float [[Y_F]] to i32 -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] -; CHECK-NEXT: ret i32 [[Z]] +; ALL-LABEL: define {{[^@]+}}@callee_different_types +; ALL-SAME: (ptr [[P:%.*]]) { +; ALL-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4 +; ALL-NEXT: [[Y_F:%.*]] = load float, ptr [[P]], align 4 +; ALL-NEXT: [[Y:%.*]] = fptoui float [[Y_F]] to i32 +; ALL-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] +; ALL-NEXT: ret i32 [[Z]] ; %x = load i32, ptr %p %y.f = load float, ptr %p @@ -45,10 +69,10 @@ } define void @caller_different_types(ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@caller_different_types -; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @callee_different_types(ptr [[P]]) -; CHECK-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@caller_different_types +; ALL-SAME: (ptr [[P:%.*]]) { +; ALL-NEXT: [[TMP1:%.*]] = call i32 @callee_different_types(ptr [[P]]) +; ALL-NEXT: ret void ; call i32 @callee_different_types(ptr %p) ret void @@ -56,13 +80,13 @@ ; The two loads overlap: Don't promote. define internal i32 @callee_overlap(ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@callee_overlap -; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4 -; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i64 2 -; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[P1]], align 4 -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] -; CHECK-NEXT: ret i32 [[Z]] +; ALL-LABEL: define {{[^@]+}}@callee_overlap +; ALL-SAME: (ptr [[P:%.*]]) { +; ALL-NEXT: [[X:%.*]] = load i32, ptr [[P]], align 4 +; ALL-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P]], i64 2 +; ALL-NEXT: [[Y:%.*]] = load i32, ptr [[P1]], align 4 +; ALL-NEXT: [[Z:%.*]] = add i32 [[X]], [[Y]] +; ALL-NEXT: ret i32 [[Z]] ; %x = load i32, ptr %p %p1 = getelementptr i8, ptr %p, i64 2 @@ -72,10 +96,10 @@ } define void @caller_overlap(ptr %p) { -; CHECK-LABEL: define {{[^@]+}}@caller_overlap -; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @callee_overlap(ptr [[P]]) -; CHECK-NEXT: ret void +; ALL-LABEL: define {{[^@]+}}@caller_overlap +; ALL-SAME: (ptr [[P:%.*]]) { +; ALL-NEXT: [[TMP1:%.*]] = call i32 @callee_overlap(ptr [[P]]) +; ALL-NEXT: ret void ; call i32 @callee_overlap(ptr %p) ret void