Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -355,6 +355,7 @@ void initializeProcessImplicitDefsPass(PassRegistry&); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); +void initializePropagateAndConvertNoAliasLegacyPassPass(PassRegistry &); void initializePruneEHPass(PassRegistry&); void initializeRABasicPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -50,6 +50,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -217,6 +218,7 @@ (void) llvm::createSeparateConstOffsetFromGEPPass(); (void) llvm::createSpeculativeExecutionPass(); (void) llvm::createSpeculativeExecutionIfHasBranchDivergencePass(); + (void) llvm::createPropagateAndConvertNoAliasPass(); (void) llvm::createRewriteSymbolsPass(); (void) llvm::createStraightLineStrengthReducePass(); (void) llvm::createMemDerefPrinter(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -434,6 +434,10 @@ // TargetTransformInfo::hasBranchDivergence() is true. FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); +// PropagateAndConvertNoAlias: move noalias intrinsics into a provenance of +// load/store instructions +FunctionPass *createPropagateAndConvertNoAliasPass(); + //===----------------------------------------------------------------------===// // // StraightLineStrengthReduce - This pass strength-reduces some certain Index: llvm/include/llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h @@ -0,0 +1,37 @@ +//===- PropagateAndConvertNoAlias.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass moves dependencies on llvm.noalias onto the ptr_provenance. +/// It also introduces and propagates provenance.noalias and noalias.arg.guard +/// intrinsics. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H +#define LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class DominatorTree; + +class PropagateAndConvertNoAliasPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM + bool runImpl(Function &F, llvm::DominatorTree &DT); + +private: + bool doit(Function &F, llvm::DominatorTree &DT); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -168,6 +168,7 @@ #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" @@ -457,6 +458,10 @@ // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // Hoisting of scalars and load expressions. FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); @@ -530,6 +535,10 @@ // Delete small array after loop unroll. FPM.addPass(SROA()); + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // Specially optimize memory movement as it doesn't look like dataflow in SSA. FPM.addPass(MemCpyOptPass()); @@ -584,6 +593,11 @@ // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + if (EnableKnowledgeRetention) FPM.addPass(AssumeSimplifyPass()); @@ -688,6 +702,10 @@ // Delete small array after loop unroll. FPM.addPass(SROA()); + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // Eliminate redundancies. FPM.addPass(MergedLoadStoreMotionPass()); if (RunNewGVN) @@ -770,6 +788,11 @@ FunctionPassManager FPM; FPM.addPass(SROA()); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); @@ -944,6 +967,11 @@ EarlyFPM.addPass(SimplifyCFGPass()); EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + EarlyFPM.addPass(PropagateAndConvertNoAliasPass()); + EarlyFPM.addPass(LowerExpectIntrinsicPass()); if (PTO.Coroutines) EarlyFPM.addPass(CoroEarlyPass()); @@ -1523,6 +1551,10 @@ // Break up allocas FPM.addPass(SROA()); + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. FPM.addPass(TailCallElimPass()); Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -181,6 +181,7 @@ FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) +FUNCTION_PASS("convert-noalias", PropagateAndConvertNoAliasPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) FUNCTION_PASS("coro-early", CoroEarlyPass()) FUNCTION_PASS("coro-elide", CoroElidePass()) Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -308,6 +308,13 @@ FPM.add(createCFGSimplificationPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); + + // Propagate and Convert as early as possible. + // But do it after SROA! + FPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + FPM.add(createVerifierPass()); + FPM.add(createLowerExpectIntrinsicPass()); } @@ -338,6 +345,13 @@ MPM.add(createFunctionInliningPass(IP)); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); @@ -372,6 +386,12 @@ if (EnableKnowledgeRetention) MPM.add(createAssumeSimplifyPass()); + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + if (OptLevel > 1) { if (EnableGVNHoist) MPM.add(createGVNHoistPass()); @@ -460,6 +480,12 @@ // opened up by them. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); + // Propagate and Convert as early as possible. + // But do it after SROA! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + if (OptLevel > 1) { MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); @@ -620,6 +646,12 @@ MPM.add(Inliner); Inliner = nullptr; RunInliner = true; + + // Propagate and Convert as early as possible. + // But do it after SROA! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); } // Infer attributes on declarations, call sites, arguments, etc. for an SCC. @@ -967,6 +999,12 @@ if (RunInliner) { PM.add(Inliner); Inliner = nullptr; + + // Propagate and Convert as early as possible. + // But do it after SROA! + PM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + PM.add(createVerifierPass()); } PM.add(createPruneEHPass()); // Remove dead EH info. @@ -1000,6 +1038,12 @@ // Break up allocas PM.add(createSROAPass()); + // Propagate and Convert as early as possible. + // But do it after SROA! + PM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + PM.add(createVerifierPass()); + // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. if (OptLevel > 1) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -56,6 +56,7 @@ NewGVN.cpp PartiallyInlineLibCalls.cpp PlaceSafepoints.cpp + PropagateAndConvertNoAlias.cpp Reassociate.cpp Reg2Mem.cpp RewriteStatepointsForGC.cpp Index: llvm/lib/Transforms/Scalar/PropagateAndConvertNoAlias.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/PropagateAndConvertNoAlias.cpp @@ -0,0 +1,1239 @@ +//===- PropagateAndConvertNoAlias.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass moves dependencies on llvm.noalias onto the ptr_provenance. +// It also introduces and propagates llvm.provenance.noalias and +// llvm.noalias.arg.guard intrinsics. +// +// It is best placed as early as possible, but after: SROA+EarlyCSE +// - SROA: SROA converts llvm.noalias.copy.guard into llvm.noalias +// - EarlyCSE helps in cleaning up some expressions, make our work here easier. +// +// And after inlining: inlining can also expose new llvm.noalias intrinsics and +// extra information about the dependencies. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "convert-noalias" + +namespace { + +class PropagateAndConvertNoAliasLegacyPass : public FunctionPass { +public: + static char ID; + explicit PropagateAndConvertNoAliasLegacyPass() : FunctionPass(ID), Impl() { + initializePropagateAndConvertNoAliasLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Propagate and Convert Noalias intrinsics"; + } + +private: + PropagateAndConvertNoAliasPass Impl; +}; +} // namespace + +char PropagateAndConvertNoAliasLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PropagateAndConvertNoAliasLegacyPass, "convert-noalias", + "Propagate And Convert llvm.noalias intrinsics", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(PropagateAndConvertNoAliasLegacyPass, "convert-noalias", + "Propagate And Convert llvm.noalias intrinsics", false, + false) + +void PropagateAndConvertNoAliasLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addPreserved(); + // FIXME: not sure the CallGraphWrapperPass is needed. It ensures the same + // pass order is kept as if the PropagateAndConvertNoAlias pass was not there. + AU.addPreserved(); + AU.addPreserved(); + AU.addRequiredTransitive(); +} + +bool PropagateAndConvertNoAliasLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return Impl.runImpl(F, getAnalysis().getDomTree()); +} + +namespace llvm { + +bool PropagateAndConvertNoAliasPass::runImpl(Function &F, DominatorTree &DT) { + return doit(F, DT); +} + +FunctionPass *createPropagateAndConvertNoAliasPass() { + return new PropagateAndConvertNoAliasLegacyPass(); +} + +PreservedAnalyses +PropagateAndConvertNoAliasPass::run(Function &F, FunctionAnalysisManager &AM) { + bool Changed = runImpl(F, AM.getResult(F)); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + // FIXME: not sure this is valid: + //?? PA.preserve(); // See above + + return PA; +} + +typedef SmallVector ProvenanceWorklist; +typedef SmallVector DepsVector; +typedef std::map I2Deps; +typedef SmallPtrSet InstructionSet; +typedef SmallPtrSet BasicBlockSet; + +// Analyse and propagate the instructions that need provenances: +// - InstructionsForProvenance: instructions that need a provenance +// representation +// - at entry: (A) +// -- llvm.noalias -> llvm.provenance.noalias +// -- llvm.noalias.arg.guard a, prov_a -> prov_a +// +// - during propagation: (B) +// -- select a, b, c -> select a, prov_b, prov_c +// -- PHI a, b,... -> PHI prov_a, prov_b, ... +// +// - Handled: Instructions that have been investigated. The Deps side refers to +// the provenance dependency. (C) +// -- a nullptr indicates that the normal dependency must be used for that +// operand +// -- an I indictates that the provenance representation of I must be used for +// that operand +// +// The algorithm: +// - We start from the llvm.noalias and llvm.noalias.arg.guard instructions +// - We go over their users, and check if they are special or not +// -- special users need a provenance representation and are annotated as such +// in 'Handled' (non-empty Dep) +// -- normal instructions are a passthrough, and are annotated with an empty Dep +// in 'Handled' (I->{}) +// -- some instructions stop the recursion: +// --- ICmp +// --- first arg of select +// --- llvm.provenance.noalias, llvm.noalias +// +// After the analysis, 'Handled' contains an overview of all instructions that +// depend on (A) +// - those instructions that were seen, but ignored otherwise have no +// dependencies (I -> {} ) +// - instructions that refer to one ore more provenances have explicit +// dependencies. (I -> { op0, op1, op2, ... }) +// -- if opX == nullptr -> not a real ptr_provenance dependency +// -- if opX == someI : +// ---- if someI points to an instruction in Handled, it must be one of the +// instructions that have a provenance representation +// ---- otherwise, it points to a not-handle plain dependency (coming from a +// noalias.arg.guard) +static void propagateInstructionsForProvenance( + ProvenanceWorklist &InstructionsForProvenance, I2Deps &Handled, + ProvenanceWorklist &out_CreationList, InstructionSet &ProvenancePHIs, + BasicBlockSet &DeadBasicBlocks) { + auto updateMatchingOperands = [](Instruction *U, Instruction *I, + DepsVector &Deps, Instruction *I4SC) { + assert(U->getNumOperands() == Deps.size()); + auto it = Deps.begin(); + for (Value *UOp : U->operands()) { + if (UOp == I) { + assert(*it == nullptr || *it == I4SC); + *it = I4SC; + } + ++it; + } + }; + + while (!InstructionsForProvenance.empty()) { + Instruction *I4SC = InstructionsForProvenance.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() + << "-- Propagating provenance instruction: " << *I4SC << "\n"); + if (DeadBasicBlocks.count(I4SC->getParent())) { + LLVM_DEBUG(llvm::dbgs() << "--- Skipped - dead basic block\n"); + continue; + } + SmallVector WorkList = {I4SC}; + if (auto *CB = dyn_cast(I4SC)) { + if (CB->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + // llvm.noalias.arg.guard: delegate to ptr_provenance (operand 1) + Handled.insert(I2Deps::value_type(I4SC, {})); + // no need to add to out_CreationList + + assert(!isa(I4SC->getOperand(0)) && + !isa(I4SC->getOperand(1)) && + "Degenerated case must have been resolved already"); + assert(I4SC->getOperand(0) != I4SC->getOperand(1) && + "Degenerated case must have been resolved already"); + + I4SC = dyn_cast(I4SC->getOperand(1)); + if (I4SC == nullptr) { + // Provenance became a constant ? Then the arg guard is not needed + // any more and there is nothing to propagate + continue; + } + } + } + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() << "-- checking:" << *I << "\n"); + if (DeadBasicBlocks.count(I->getParent())) { + LLVM_DEBUG(llvm::dbgs() << "--- skipped - dead basic block\n"); + continue; + } + bool isPtrToInt = isa(I); + for (auto &UOp : I->uses()) { + auto *U_ = UOp.getUser(); + LLVM_DEBUG(llvm::dbgs() << "--- used by:" << *U_ + << ", operand:" << UOp.getOperandNo() << "\n"); + Instruction *U = dyn_cast(U_); + if (U == nullptr) + continue; + + // Only see through a ptr2int if it used by a int2ptr + if (isPtrToInt && !isa(U)) + continue; + + if (isa(U)) { + // ======================================== select -> { lhs, rhs } + bool MatchesOp1 = (U->getOperand(1) == I); + bool MatchesOp2 = (U->getOperand(2) == I); + + if (MatchesOp1 || MatchesOp2) { + auto HI = Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp1) { + HI.first->second[0] = I4SC; + } + if (MatchesOp2) { + HI.first->second[1] = I4SC; + } + if (HI.second) { + InstructionsForProvenance.push_back(U); + } + } + } else if (isa(U)) { + // ======================================== load -> { ptr } + if (UOp.getOperandNo() == + LoadInst::getNoaliasProvenanceOperandIndex()) + continue; // tracking on provenance -> ignore + + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + assert(U->getOperand(0) == I); + if (HI.second) { + // continue + } + } else if (isa(U)) { + // ======================================== store -> { val, ptr } + if (UOp.getOperandNo() == + StoreInst::getNoaliasProvenanceOperandIndex()) + continue; // tracking on provenance -> ignore + + // also track if we are storing a restrict annotated pointer value... + // This might provide useful information about 'escaping pointers' + bool MatchesOp0 = (U->getOperand(0) == I); + bool MatchesOp1 = (U->getOperand(1) == I); + + if (MatchesOp0 || MatchesOp1) { + auto HI = Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp0) { + HI.first->second[0] = I4SC; + } + if (MatchesOp1) { + HI.first->second[1] = I4SC; + } + } + } else if (isa(U)) { + // ======================================== insertvalue -> { val } + // track for injecting llvm.noalias.arg.guard + assert(U->getOperand(1) == I); + // need to introduce a guard + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + // ======================================== ptr2int -> { val } + // track for injecting llvm.noalias.arg.guard + assert(U->getOperand(0) == I); + // need to introduce a guard + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + // ======================================== PHI -> { ..... } + PHINode *PU = cast(U); + auto HI = Handled.insert(I2Deps::value_type(U, {})); + if (HI.second) { + HI.first->second.resize(U->getNumOperands(), nullptr); + if (ProvenancePHIs.count(U) == 0) { + // This is a normal PHI, consider it for propagation + InstructionsForProvenance.push_back(U); + } + if (U->getNumOperands()) + out_CreationList.push_back(U); + } + updateMatchingOperands(PU, I, HI.first->second, I4SC); + } else if (auto *CS = dyn_cast(U)) { + // =============================== call/invoke/intrinsic -> { ...... } + + // NOTES: + // - we always block at a call... + // - the known intrinsics should not have any extra annotations + switch (CS->getIntrinsicID()) { + case Intrinsic::provenance_noalias: + case Intrinsic::noalias: { + bool MatchesOp0 = (U->getOperand(0) == I); + bool MatchesOpP = + (U->getOperand(Intrinsic::NoAliasIdentifyPArg) == I); + static_assert(Intrinsic::NoAliasIdentifyPArg == + Intrinsic::ProvenanceNoAliasIdentifyPArg, + "those must be identical"); + + if (MatchesOp0 || MatchesOpP) { + auto HI = + Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp0) { + HI.first->second[0] = I4SC; + } + if (MatchesOpP) { + HI.first->second[1] = I4SC; + } + } + continue; + } + case Intrinsic::noalias_arg_guard: { + // ignore - should be handled by the outer loop ! + continue; + } + + default: + break; + } + // if we get here, we need to inject guards for certain arguments. + // Track which arguments will need one. + auto HI = Handled.insert(I2Deps::value_type(U, {})); + if (HI.second) { + HI.first->second.resize(U->getNumOperands(), nullptr); + if (U->getNumOperands()) { + out_CreationList.push_back(U); + } + } + updateMatchingOperands(U, I, HI.first->second, I4SC); + if (I == CS->getReturnedArgOperand()) { + // also see through call - this does not omit the need of + // introducing a noalias_arg_guard + WorkList.push_back(U); + } + } else { + // ======================================== other -> {} + // this is the generic case... not sure if we should have a elaborate + // check for 'all other instructions'. just acknowledge that we saw it + // and propagate to any users + // - NOTE: if we happen have already handled it, this might indicate + // something interesting that we should handle separately + + switch (U->getOpcode()) { + case Instruction::ICmp: + // restrict pointer used in comparison - do not propagate + // provenance + continue; + default: + break; + } + + auto HI = Handled.insert(I2Deps::value_type(U, {})); + // No need to add to out_CreationList + if (!HI.second) { + llvm::errs() + << "WARNING: found an instruction that was already handled:" + << *U << "\n"; + assert(!HI.second && + "We should not encounter a handled instruction ??"); + } + + if (HI.second) { + WorkList.push_back(U); + } + } + } + } + } +} + +typedef SmallDenseMap, Value *, 16> + ValueType2CastMap; +static Value *createBitOrPointerOrAddrSpaceCast(Value *V, Type *T, + ValueType2CastMap &VT2C) { + if (V->getType() == T) + return V; + + // Make sure we remember what casts we introduced + Value *&Entry = VT2C[std::make_pair(V, T)]; + if (Entry == nullptr) { + Instruction *InsertionPoint = cast(V); + if (auto *PHI = dyn_cast(V)) { + InsertionPoint = PHI->getParent()->getFirstNonPHI(); + } else { + InsertionPoint = InsertionPoint->getNextNode(); + } + + IRBuilder<> Builder(InsertionPoint); + Entry = Builder.CreateBitOrPointerCast(V, T); + } + return Entry; +} + +static bool isValidProvenanceNoAliasInsertionPlace(IntrinsicInst *SNA, + Value *InsertionPointV, + DominatorTree &DT) { + assert(SNA->getIntrinsicID() == Intrinsic::provenance_noalias && + "Expect a provenance.noalias"); + Instruction *InsertionPointI = dyn_cast(InsertionPointV); + if (InsertionPointI == nullptr) + return false; + + auto isDominatingOn = [&](Value *Arg) { + auto *ArgI = dyn_cast(Arg); + if (ArgI == nullptr) + return true; + return DT.dominates(ArgI, InsertionPointI); + }; + + for (auto Op : {Intrinsic::ProvenanceNoAliasNoAliasDeclArg, + Intrinsic::ProvenanceNoAliasIdentifyPArg, + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg, + Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, + Intrinsic::ProvenanceNoAliasScopeArg}) { + if (!isDominatingOn(SNA->getOperand(Op))) + return false; + } + + return true; +} + +// combine llvm.provenance.noalias intrinsics as much as possible +void collapseProvenanceNoAlias( + ProvenanceWorklist &CollapseableProvenanceNoAliasIntrinsics, + DominatorTree &DT) { + if (CollapseableProvenanceNoAliasIntrinsics.empty()) + return; + + if (!CollapseableProvenanceNoAliasIntrinsics.empty()) { + // sweep from back to front, then from front to back etc... until no + // modifications are done + do { + LLVM_DEBUG(llvm::dbgs() + << "- Trying to collapse llvm.provenance.noalias\n"); + ProvenanceWorklist NextList; + bool Changed = false; + + // 1) provenance.noaliasA (provenance.noaliasB (....), ...) -> + // provenance.noaliasB(...) + while (!CollapseableProvenanceNoAliasIntrinsics.empty()) { + IntrinsicInst *I = + cast(CollapseableProvenanceNoAliasIntrinsics.back()); + assert(I->getIntrinsicID() == Intrinsic::provenance_noalias); + + CollapseableProvenanceNoAliasIntrinsics.pop_back(); + + // provenance.noalias (provenance.noalias(....), .... ) -> + // provenance.noalias(....) + if (IntrinsicInst *DepI = dyn_cast(I->getOperand(0))) { + // Check if the depending intrinsic is compatible) + if (DepI->getIntrinsicID() == Intrinsic::provenance_noalias && + areProvenanceNoAliasCompatible(DepI, I)) { + // similar enough - look through + LLVM_DEBUG(llvm::dbgs() << "-- Collapsing(1):" << *I << "\n"); + I->replaceAllUsesWith(DepI); + I->eraseFromParent(); + Changed = true; + continue; + } + } + + if (PHINode *DepI = dyn_cast(I->getOperand(0))) { + //@ FIXME: TODO: make more general ? + // provenance.noalias(PHI (fum, self)) -> PHI(provenance.noalias(fum), + // phi self ref) + // - NOTE: only handle the 'simple' case for now ! At least that will + // be correct. + if ((DepI->getNumIncomingValues() == 2) && + (DepI->getNumUses() == 1)) { + LLVM_DEBUG(llvm::dbgs() + << "--- Investigating interesting PHI depenceny\n"); + bool SelfDep0 = (DepI->getOperand(0) == I); + bool SelfDep1 = (DepI->getOperand(1) == I); + if (SelfDep0 || SelfDep1) { + LLVM_DEBUG(llvm::dbgs() << "---- has self dependency\n"); + unsigned ChannelToFollow = SelfDep0 ? 1 : 0; + // Try to find a possible insertion point + if (isValidProvenanceNoAliasInsertionPlace( + I, DepI->getOperand(ChannelToFollow), DT)) { + // create a new provenance.noalias at the insertion point + // FIXME: if DepDepI is not an instruction, we could take the + // end of the BB as insertion location ?? + LLVM_DEBUG(llvm::dbgs() << "----- Migrating !\n"); + Instruction *DepDepI = + cast(DepI->getOperand(ChannelToFollow)); + auto DepDepIIt = DepDepI->getIterator(); + if (isa(DepDepI)) { + DepDepIIt = DepDepI->getParent()->getFirstInsertionPt(); + } else { + ++DepDepIIt; + } + IRBuilder<> builder(DepDepI->getParent(), DepDepIIt); + + auto *NewSNA = builder.CreateProvenanceNoAliasPlain( + DepDepI, + I->getOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg), + I->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg), + I->getOperand( + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg), + I->getOperand( + Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg), + I->getOperand(Intrinsic::ProvenanceNoAliasScopeArg)); + AAMDNodes Metadata; + I->getAAMetadata(Metadata); + NewSNA->setAAMetadata(Metadata); + I->replaceAllUsesWith(NewSNA); + I->eraseFromParent(); + Changed = true; + // And handle the new provenance.noalias for the next sweep + NextList.push_back(NewSNA); + continue; + } + } + } + } + + NextList.push_back(I); + } + + // 2) provenance.noaliasA (...), provenance.noaliasB(...) --> + // provenance.noaliasA(...) + { + for (Instruction *I : NextList) { + IntrinsicInst *II = cast(I); + Instruction *DominatingUse = II; + + ProvenanceWorklist similarProvenances; + for (User *U : II->getOperand(0)->users()) { + if (IntrinsicInst *UII = dyn_cast(U)) { + if (UII->getParent() && // still valid - ignore already removed + // instructions + UII->getIntrinsicID() == Intrinsic::provenance_noalias && + areProvenanceNoAliasCompatible(II, UII)) { + similarProvenances.push_back(UII); + if (DT.dominates(UII, DominatingUse)) + DominatingUse = UII; + } + } + } + + for (Instruction *SI : similarProvenances) { + if ((SI != DominatingUse) && DT.dominates(DominatingUse, SI)) { + LLVM_DEBUG(llvm::dbgs() << "-- Collapsing(2):" << *SI << "\n"); + Changed = true; + SI->replaceAllUsesWith(DominatingUse); + SI->removeFromParent(); // do not yet erase ! + assert((std::find(NextList.begin(), NextList.end(), SI) != + NextList.end()) && + "Similar ptr_provenance must be on the NextList"); + } + } + } + + if (!Changed) + break; + + // Now eliminate all removed intrinsics + llvm::erase_if(NextList, [](Instruction *I) { + if (I->getParent()) { + return false; + } else { + I->deleteValue(); + return true; + } + }); + } + + CollapseableProvenanceNoAliasIntrinsics = NextList; + } while (CollapseableProvenanceNoAliasIntrinsics.size() > 1); + } +} + +// Look at users of llvm.provenance.noalias to find PHI nodes that are used for +// pointer provenance +static void +deduceProvenancePHIs(ProvenanceWorklist &ProvenanceNoAliasIntrinsics, + InstructionSet &out_ProvenancePHIs, + InstructionSet &out_NoAliasArgGuard, + BasicBlockSet &DeadBasicBlocks) { + LLVM_DEBUG(llvm::dbgs() << "-- Looking up ptr_provenance PHI nodes\n"); + for (Instruction *SNI : ProvenanceNoAliasIntrinsics) { + ProvenanceWorklist worklist = {SNI}; + while (!worklist.empty()) { + Instruction *worker = worklist.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() << "worker" << *worker << "\n"); + if (DeadBasicBlocks.count(worker->getParent())) + continue; // Degenerated llvm-ir; Skip + for (auto *SNIUser_ : worker->users()) { + Instruction *SNIUser = dyn_cast(SNIUser_); + if (SNIUser == nullptr) + continue; + + if (isa(SNIUser)) { + // Identify as a ptr_provenance PHI + if (out_ProvenancePHIs.insert(cast(SNIUser)).second) { + LLVM_DEBUG(llvm::dbgs() << "--- " << *SNIUser << "\n"); + // and propagate + worklist.push_back(SNIUser); + } + } else if (isa(SNIUser) || isa(SNIUser) || + isa(SNIUser) || + isa(SNIUser)) { + assert(SNIUser != worker && "not in ssa form ?"); + // look through select/bitcast/addressspacecast + worklist.push_back(SNIUser); + } else { + // load/store/provenance.noalias/arg.guard -> stop looking + if (auto *CB = dyn_cast(SNIUser)) { + auto CBIID = CB->getIntrinsicID(); + if (CBIID == Intrinsic::noalias_arg_guard) { + assert(CB->getOperand(1) == worker && + "a noalias.arg.guard provenance should be linked to " + "operand 1"); + out_NoAliasArgGuard.insert(CB); + } else if (CBIID == Intrinsic::provenance_noalias) { + // ok + } else { + LLVM_DEBUG(llvm::dbgs() + << "ERROR: unexpected call/intrinsic depending on " + "llvm.provenance.noalias:" + << *CB << "\n"); + assert(false && + "Unexpected llvm.provenance.noalias dependency (1)"); + } + } else { + if (isa(SNIUser) || isa(SNIUser)) { + // ok + } else { + LLVM_DEBUG(llvm::dbgs() + << "ERROR: unexpected instruction depending on " + "llvm.provenance.noalias:" + << *SNIUser << "\n"); + assert(false && + "Unexpected llvm.provenance.noalias dependency (2)"); + } + } + } + } + } + } +} + +static void RetrieveDeadBasicBlocks(Function &F, + BasicBlockSet &out_DeadBasicBlocks) { + df_iterator_default_set Reachable; + + // Mark all reachable blocks. + for (BasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB /* Mark all reachable blocks */; + + for (auto &BB : F) { + if (!Reachable.count(&BB)) { + out_DeadBasicBlocks.insert(&BB); + LLVM_DEBUG(llvm::dbgs() << "- Unreachable BB:" << BB.getName() << "\n"); + } + } + + LLVM_DEBUG(llvm::dbgs() << "- There are " << out_DeadBasicBlocks.size() + << " unreachable BB on a total of " + << F.getBasicBlockList().size() << "\n"); +} + +void removeNoAliasIntrinsicsFromDeadBlocks(BasicBlockSet &DeadBlocks) { + LLVM_DEBUG(llvm::dbgs() << "- removing NoAlias intrinsics from " + << DeadBlocks.size() << " dead blocks\n"); + ProvenanceWorklist ToBeRemoved; + + for (auto *BB : DeadBlocks) { + for (auto &I : *BB) { + if (auto CB = dyn_cast(&I)) { + switch (CB->getIntrinsicID()) { + case Intrinsic::noalias: + case Intrinsic::noalias_decl: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: + ToBeRemoved.push_back(&I); + break; + default: + break; + } + } + } + } + + LLVM_DEBUG(llvm::dbgs() << "-- Removing " << ToBeRemoved.size() + << " intrinsics\n"); + for (auto *I : ToBeRemoved) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } +} + +bool PropagateAndConvertNoAliasPass::doit(Function &F, DominatorTree &DT) { + LLVM_DEBUG(llvm::dbgs() << "PropagateAndConvertNoAliasPass:\n"); + + // PHASE 0: find interesting instructions + // - Find all: + // -- Propagatable noalias intrinsics + // -- Load instructions + // -- Store instructions + ProvenanceWorklist InstructionsForProvenance; + ProvenanceWorklist LoadStoreIntrinsicInstructions; + ProvenanceWorklist LookThroughIntrinsics; + ProvenanceWorklist CollapseableProvenanceNoAliasIntrinsics; + ValueType2CastMap VT2C; + InstructionSet ProvenancePHIs; + ProvenanceWorklist DegeneratedNoAliasAndNoAliasArgGuards; + ProvenanceWorklist RemainingNoAliasArgGuards; + InstructionSet DecentNoAliasArgGuards; + + // Do not depend on simplifyCFG or eliminateDeadBlocks. Forcing any of them + // before the propagate can result in significant code degradations :( + // Live with the fact that we can observe degenerated llvm-ir. + BasicBlockSet DeadBasicBlocks; + RetrieveDeadBasicBlocks(F, DeadBasicBlocks); + + LLVM_DEBUG(llvm::dbgs() << "- gathering intrinsics, stores, loads:\n"); + for (auto &BB : F) { + if (DeadBasicBlocks.count(&BB)) + continue; // Skip dead basic blocks + + for (auto &I : BB) { + if (auto CB = dyn_cast(&I)) { + auto ID = CB->getIntrinsicID(); + if (ID == Intrinsic::noalias) { + LLVM_DEBUG(llvm::dbgs() << "-- found intrinsic:" << I << "\n"); + auto Op0 = I.getOperand(0); + if (isa(Op0)) { + LLVM_DEBUG(llvm::dbgs() << "--- degenerated\n"); + DegeneratedNoAliasAndNoAliasArgGuards.push_back(&I); + } else { + InstructionsForProvenance.push_back(&I); + LoadStoreIntrinsicInstructions.push_back(&I); + LookThroughIntrinsics.push_back(&I); + } + } else if (ID == Intrinsic::noalias_arg_guard) { + LLVM_DEBUG(llvm::dbgs() << "-- found intrinsic:" << I << "\n"); + auto Op0 = I.getOperand(0); + auto Op1 = I.getOperand(1); + if (isa(Op0) || isa(Op1) || (Op0 == Op1)) { + LLVM_DEBUG(llvm::dbgs() << "--- degenerated\n"); + DegeneratedNoAliasAndNoAliasArgGuards.push_back(&I); + } else { + RemainingNoAliasArgGuards.push_back(&I); + } + } else if (ID == Intrinsic::provenance_noalias) { + CollapseableProvenanceNoAliasIntrinsics.push_back(&I); + } + } else if (auto LI = dyn_cast(&I)) { + LLVM_DEBUG(llvm::dbgs() << "-- found load:" << I << "\n"); + LoadStoreIntrinsicInstructions.push_back(LI); + } else if (auto SI = dyn_cast(&I)) { + LLVM_DEBUG(llvm::dbgs() << "-- found store:" << I << "\n"); + LoadStoreIntrinsicInstructions.push_back(SI); + } + } + } + + // When there are no noalias related intrinsics, don't do anything. + if (LookThroughIntrinsics.empty() && InstructionsForProvenance.empty() && + DegeneratedNoAliasAndNoAliasArgGuards.empty() && + CollapseableProvenanceNoAliasIntrinsics.empty() && + RemainingNoAliasArgGuards.empty()) { + LLVM_DEBUG(llvm::dbgs() << "- Nothing to do\n"); + return false; + } + + if (!DeadBasicBlocks.empty()) { + removeNoAliasIntrinsicsFromDeadBlocks(DeadBasicBlocks); + } + + LLVM_DEBUG( + llvm::dbgs() << "- Looking through degenerated llvm.noalias.arg.guard\n"); + for (Instruction *I : DegeneratedNoAliasAndNoAliasArgGuards) { + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } + + LLVM_DEBUG(llvm::dbgs() << "- Retrieving ptr_provenance PHI nodes and decent " + "llvm.noalias.arg.guard\n"); + deduceProvenancePHIs(CollapseableProvenanceNoAliasIntrinsics, ProvenancePHIs, + DecentNoAliasArgGuards, DeadBasicBlocks); + + LLVM_DEBUG( + llvm::dbgs() << "- looking through remaining llvm.noalias.arg.guard"); + for (Instruction *I : RemainingNoAliasArgGuards) { + if (DecentNoAliasArgGuards.find(I) != DecentNoAliasArgGuards.end()) { + InstructionsForProvenance.push_back(I); + LoadStoreIntrinsicInstructions.push_back(I); + LookThroughIntrinsics.push_back(I); + } else { + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } + } + + LLVM_DEBUG(llvm::dbgs() << "- Find out what to do:\n"); + + // PHASE 1: forward pass: + // - Start with all intrinsics + // -- Track all users + // -- Interesting users (noalias intrinsics, select, PHI, load/store) + // -- Do this recursively for users that we can look through + I2Deps Handled; // instruction -> { dependencies } + ProvenanceWorklist + CreationList; // Tracks all keys in Handled, but in a reproducable way + propagateInstructionsForProvenance(InstructionsForProvenance, Handled, + CreationList, ProvenancePHIs, + DeadBasicBlocks); + + // PHASE 2: add missing load/store/intrinsic instructions: + for (auto *I : LoadStoreIntrinsicInstructions) { + if (isa(I)) { + if (Handled.insert(I2Deps::value_type(I, {nullptr})).second) + CreationList.push_back(I); + } else { // Store or llvm.no_alias + if (Handled.insert(I2Deps::value_type(I, {nullptr, nullptr})).second) + CreationList.push_back(I); + } + } + +#if !defined(NDEBUG) + auto dumpit = [](I2Deps::value_type &H) { + auto &out = llvm::dbgs(); + out << *H.first << " -> {"; + bool comma = false; + for (auto D : H.second) { + if (comma) + out << ","; + comma = true; + if (D == nullptr) { + out << "nullptr"; + } else { + out << *D; + } + } + out << "}\n"; + }; +#endif + + // PHASE 3: reconstruct alternative tree + // - detected dependencies: replace them by new instructions + // - undetected dependencies: use the original dependency + // NOTE: See explanation in propagateInstructionsForProvenance for more + // information ! + LLVM_DEBUG(llvm::dbgs() << "- Reconstructing tree:\n"); + + ProvenanceWorklist UnresolvedPHI; + SmallDenseMap I2NewV; + SmallDenseMap I2ArgGuard; + + auto getNewIOrOperand = [&](Instruction *DepOp, Value *OrigOp) { + assert(((!DepOp) || I2NewV.count(DepOp)) && "DepOp should be known"); + return DepOp ? static_cast(I2NewV[DepOp]) : OrigOp; + }; + + // Helper lambda for inserting a new noalias.arg.guard + auto setNewNoaliasArgGuard = [&](Instruction *I, unsigned Index, + Instruction *DepOp) { + auto *ProvOp = cast(I2NewV[DepOp]); + // if we get here, the operand has to be an 'Instruction' + // (otherwise, DepOp would not be set). + auto *OpI = cast(I->getOperand(Index)); + auto &ArgGuard = I2ArgGuard[OpI]; + if (ArgGuard == nullptr) { + // create the instruction close to the origin, so that we don't introduce + // bad dependencies + auto InsertionPointIt = OpI->getIterator(); + ++InsertionPointIt; + if (isa(OpI)) { + auto End = OpI->getParent()->end(); + while (InsertionPointIt != End) { + if (!isa(*InsertionPointIt)) + break; + ++InsertionPointIt; + } + } + IRBuilder<> BuilderForArgs(OpI->getParent(), InsertionPointIt); + ArgGuard = BuilderForArgs.CreateNoAliasArgGuard( + OpI, createBitOrPointerOrAddrSpaceCast(ProvOp, OpI->getType(), VT2C), + OpI->getName() + ".guard"); + } + I->setOperand(Index, ArgGuard); + }; + + // Map known provenance.noalias that are not handle to themselves + for (auto SNI : CollapseableProvenanceNoAliasIntrinsics) + if (Handled.find(SNI) == Handled.end()) + I2NewV[SNI] = SNI; + + // We are doing a number of sweeps. This should always end. Normally the + // amount of sweeps is low. During initial development, a number of bugs where + // found by putting a hard limit on the the amount. + unsigned Watchdog = 1000000; // Only used in assertions build + (void)Watchdog; + for (auto CloneableInst : CreationList) { + assert(Handled.count(CloneableInst) && + "Entries in CreationList must also be in Handled"); + assert(!Handled[CloneableInst].empty() && + "Only non-empty items should be added to the CreationList"); + + LLVM_DEBUG(llvm::dbgs() << "- "; dumpit(*Handled.find(CloneableInst))); + ProvenanceWorklist Worklist = {CloneableInst}; + + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + + if (I2NewV.count(I)) { + // already exists - skip + Worklist.pop_back(); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << "-- Reconstructing:" << *I << "\n"); + + // Check if we have all the needed arguments + auto HandledIt = Handled.find(I); + if (HandledIt == Handled.end()) { + // This can happen after propagation of a llvm.noalias.arg.guard + Worklist.pop_back(); + I2NewV[I] = I; + LLVM_DEBUG(llvm::dbgs() << "--- Connected to an existing path!\n"); + continue; + } + + // If we are a PHI node, just create it + if (isa(I)) { + if (ProvenancePHIs.count(cast(I)) == 0) { + // But only if it is _not_ a ptr_provenance PHI node + // ======================================== PHI -> { ..... } + IRBuilder<> Builder(I); + I2NewV[I] = Builder.CreatePHI(I->getType(), I->getNumOperands(), + Twine("prov.") + I->getName()); + + UnresolvedPHI.push_back(I); + } else { + I2NewV[I] = I; // Map already existing Provenance PHI to itself + } + Worklist.pop_back(); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << "--- "; dumpit(*HandledIt)); + auto &Deps = HandledIt->second; + assert((!Deps.empty()) && + "Any creatable instruction must have some dependent operands"); + bool canCreateInstruction = true; + for (auto *DepOp : Deps) { + if (DepOp != nullptr) { + if (I2NewV.count(DepOp) == 0) { + canCreateInstruction = false; + Worklist.push_back(DepOp); + } + } + } +#if !defined(NDEBUG) + if (--Watchdog == 0) { + llvm::errs() + << "PropagateAndConvertNoAlias: ERROR: WATCHDOG TRIGGERED !\n"; + assert(false && "PropagateAndConvertNoAlias: WATCHDOG TRIGGERED"); + } +#endif + if (canCreateInstruction) { + Worklist.pop_back(); + IRBuilder<> Builder(I); + + if (isa(I)) { + // ======================================== select -> { lhs, rhs } + I2NewV[I] = Builder.CreateSelect( + I->getOperand(0), + createBitOrPointerOrAddrSpaceCast( + getNewIOrOperand(Deps[0], I->getOperand(1)), I->getType(), + VT2C), + createBitOrPointerOrAddrSpaceCast( + getNewIOrOperand(Deps[1], I->getOperand(2)), I->getType(), + VT2C), + Twine("prov.") + I->getName()); + } else if (isa(I)) { + // ======================================== load -> { ptr } + LoadInst *LI = cast(I); + + if (Deps[0]) { + if (!LI->hasNoaliasProvenanceOperand() || + isa(LI->getNoaliasProvenanceOperand()) || + (LI->getPointerOperand() == + LI->getNoaliasProvenanceOperand())) { + LI->setNoaliasProvenanceOperand(createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[0]], LI->getPointerOperandType(), VT2C)); + } else { + // nothing to do - propagation should have happend through the + // provenance ! + // TODO: we might want to add an extra check that the load + // ptr_provenance was updated + } + } else { + // No extra dependency -> do nothing + // Note: originally we were adding a 'UndefValue' if there was no + // ptr_provenance. But that has the same effect as doing nothing. + } + I2NewV[I] = I; + } else if (isa(I)) { + // ======================================== store -> { val, ptr } + StoreInst *SI = cast(I); + + if (Deps[0]) { + // We try to store a restrict pointer - restrictness + Instruction *DepOp = Deps[0]; + setNewNoaliasArgGuard(I, 0, DepOp); + } + if (Deps[1]) { + if (!SI->hasNoaliasProvenanceOperand() || + isa(SI->getNoaliasProvenanceOperand()) || + (SI->getPointerOperand() == + SI->getNoaliasProvenanceOperand())) { + SI->setNoaliasProvenanceOperand(createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], SI->getPointerOperandType(), VT2C)); + } else { + // nothing to do - propagation should have happend through the + // provenance ! + // TODO: we might want to add an extra check that the store + // ptr_provenance was updated + } + } else { + // No extra dependency -> do nothing + // Note: originally we were adding a 'UndefValue' if there was no + // ptr_provenance. But that has the same effect as doing nothing. + } + I2NewV[I] = I; + } else if (isa(I)) { + // We try to insert a restrict pointer into a struct - track it. + // Track generated noalias_arg_guard also in I2NewI + assert(Deps.size() == 1 && + "InsertValue tracks exactly one dependency"); + Instruction *DepOp = Deps[0]; + setNewNoaliasArgGuard(I, 1, DepOp); + } else if (isa(I)) { + // We try to convert a restrict pointer to an integer - track it s + // SROA can produce this. + // Track generated noalias_arg_guard also in I2NewI + assert(Deps.size() == 1 && + "InsertValue tracks exactly one dependency"); + Instruction *DepOp = Deps[0]; + setNewNoaliasArgGuard(I, 0, DepOp); + } else { + // =============================== ret -> { ...... } + // =============================== call/invoke/intrinsic -> { ...... } + auto CB = dyn_cast(I); + if (CB) { + assert(CB && "If we get here, we should have a Call"); + switch (CB->getIntrinsicID()) { + case Intrinsic::noalias: { + // convert + assert(Deps.size() == 2); + Value *IdentifyPProvenance; + if (Deps[1]) { + // do the same as with the ptr_provenance in the load + // instruction + IdentifyPProvenance = createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], + I->getOperand(Intrinsic::NoAliasIdentifyPArg)->getType(), + VT2C); + } else { + IdentifyPProvenance = UndefValue::get( + I->getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()); + } + Instruction *NewI = Builder.CreateProvenanceNoAliasPlain( + getNewIOrOperand(Deps[0], I->getOperand(0)), + I->getOperand(Intrinsic::NoAliasNoAliasDeclArg), + I->getOperand(Intrinsic::NoAliasIdentifyPArg), + IdentifyPProvenance, + I->getOperand(Intrinsic::NoAliasIdentifyPObjIdArg), + I->getOperand(Intrinsic::NoAliasScopeArg)); + I2NewV[I] = NewI; + CollapseableProvenanceNoAliasIntrinsics.push_back(NewI); + + // Copy over metadata that is related to the 'getOperand(1)' (aka + // P) + AAMDNodes AAMetadata; + I->getAAMetadata(AAMetadata); + NewI->setAAMetadata(AAMetadata); + continue; + } + case Intrinsic::noalias_arg_guard: { + // no update needed - depending llvm.provenance.noalias/gep must + // have been updated + continue; + } + case Intrinsic::provenance_noalias: { + // update + assert((Deps[0] || Deps[1]) && + "provenance.noalias update needs a depending operand"); + if (Deps[0]) + I->setOperand(0, createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[0]], I->getType(), VT2C)); + if (Deps[1]) + I->setOperand( + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg, + createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], + I->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType(), + VT2C)); + I2NewV[I] = I; + continue; + } + default: + break; + } + } else { + assert(isa(I)); + } + + // Introduce a noalias_arg_guard for every argument that is + // annotated + assert(I->getNumOperands() == Deps.size()); + for (unsigned i = 0, ci = I->getNumOperands(); i < ci; ++i) { + Instruction *DepOp = Deps[i]; + if (DepOp) { + setNewNoaliasArgGuard(I, i, DepOp); + } + } + I2NewV[I] = I; + } + } + } + } + + // Phase 4: resolve the generated PHI nodes + LLVM_DEBUG(llvm::dbgs() << "- Resolving " << UnresolvedPHI.size() + << " PHI nodes\n"); + for (auto *PHI_ : ProvenancePHIs) { + PHINode *PHI = cast(PHI_); + auto it = Handled.find(PHI); + if (it != Handled.end()) { + LLVM_DEBUG(llvm::dbgs() << "-- Orig PHI:" << *PHI << "\n"); + auto &Deps = it->second; + for (unsigned i = 0, ci = Deps.size(); i < ci; ++i) { + LLVM_DEBUG(if (Deps[i]) llvm::dbgs() + << "--- UPDATING:Deps:" << *Deps[i] << "\n"); + Value *IncomingValue = Deps[i] ? I2NewV[Deps[i]] : nullptr; + if (IncomingValue) { + if (IncomingValue->getType() != PHI->getType()) { + IncomingValue = createBitOrPointerOrAddrSpaceCast( + IncomingValue, PHI->getType(), VT2C); + } + LLVM_DEBUG(llvm::dbgs() + << "--- IncomingValue:" << *IncomingValue << "\n"); + PHI->setIncomingValue(i, IncomingValue); + } + } + LLVM_DEBUG(llvm::dbgs() << "-- Adapted PHI:" << *PHI << "\n"); + } + } + + for (auto &PHI : UnresolvedPHI) { + PHINode *BasePHI = cast(PHI); + PHINode *NewPHI = cast(I2NewV[PHI]); + auto &Deps = Handled[PHI]; + + LLVM_DEBUG(llvm::dbgs() << "-- Orig PHI:" << *BasePHI << "\n"); + LLVM_DEBUG(llvm::dbgs() << "-- New PHI:" << *NewPHI << "\n"); + LLVM_DEBUG(llvm::dbgs() << "-- Deps: " << Deps.size() << "\n"); + for (unsigned i = 0, ci = BasePHI->getNumOperands(); i < ci; ++i) { + auto *BB = BasePHI->getIncomingBlock(i); + Value *IncomingValue = + Deps[i] ? I2NewV[Deps[i]] : BasePHI->getIncomingValue(i); + if (IncomingValue == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "--- hmm.. operand " << i << " became undef\n"); + IncomingValue = UndefValue::get(NewPHI->getType()); + } + if (IncomingValue->getType() != NewPHI->getType()) { + IncomingValue = createBitOrPointerOrAddrSpaceCast( + IncomingValue, NewPHI->getType(), VT2C); + } + NewPHI->addIncoming(IncomingValue, BB); + } + } + + // Phase 5: Removing the llvm.noalias + LLVM_DEBUG(llvm::dbgs() << "- Looking through intrinsics:\n"); + for (Instruction *I : LookThroughIntrinsics) { + auto CB = dyn_cast(I); + if (CB->getIntrinsicID() == Intrinsic::noalias || + CB->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + LLVM_DEBUG(llvm::dbgs() << "-- Eliminating: " << *I << "\n"); + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } else { + llvm_unreachable("unhandled lookthrough intrinsic"); + } + } + + // Phase 6: Collapse llvm.provenance.noalias where possible... + // - hmm: should we do this as a complete separate pass ?? + collapseProvenanceNoAlias(CollapseableProvenanceNoAliasIntrinsics, DT); + + return true; +} + +} // namespace llvm Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -89,6 +89,7 @@ initializeMergedLoadStoreMotionLegacyPassPass(Registry); initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); + initializePropagateAndConvertNoAliasLegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); initializeRedundantDbgInstEliminationPass(Registry); initializeRegToMemPass(Registry); Index: llvm/test/CodeGen/AMDGPU/opt-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -46,6 +46,7 @@ ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: SROA ; GCN-O1-NEXT: Early CSE +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Lower 'expect' Intrinsics ; GCN-O1-NEXT: Pass Arguments: @@ -96,6 +97,9 @@ ; GCN-O1-NEXT: Call Graph SCC Pass Manager ; GCN-O1-NEXT: Remove unused exception handling info ; GCN-O1-NEXT: AMDGPU Function Integration/Inlining +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Deduce function attributes ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Infer address spaces @@ -108,6 +112,7 @@ ; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Memory SSA ; GCN-O1-NEXT: Early CSE w/ MemorySSA +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Simplify the CFG ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) @@ -176,6 +181,7 @@ ; GCN-O1-NEXT: Lazy Block Frequency Analysis ; GCN-O1-NEXT: Optimization Remark Emitter ; GCN-O1-NEXT: Combine redundant instructions +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Aggressive Dead Code Elimination ; GCN-O1-NEXT: Simplify the CFG @@ -353,6 +359,7 @@ ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: SROA ; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O2-NEXT: Lower 'expect' Intrinsics ; GCN-O2-NEXT: Pass Arguments: @@ -403,6 +410,9 @@ ; GCN-O2-NEXT: Call Graph SCC Pass Manager ; GCN-O2-NEXT: Remove unused exception handling info ; GCN-O2-NEXT: AMDGPU Function Integration/Inlining +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O2-NEXT: OpenMP specific optimizations ; GCN-O2-NEXT: Deduce function attributes ; GCN-O2-NEXT: FunctionPass Manager @@ -416,7 +426,9 @@ ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Early CSE w/ MemorySSA +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O2-NEXT: Speculatively execute instructions if target has divergent branches +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lazy Value Information Analysis ; GCN-O2-NEXT: Jump Threading @@ -506,6 +518,9 @@ ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Optimization Remark Emitter ; GCN-O2-NEXT: Combine redundant instructions +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lazy Value Information Analysis ; GCN-O2-NEXT: Jump Threading ; GCN-O2-NEXT: Value Propagation @@ -708,6 +723,7 @@ ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: SROA ; GCN-O3-NEXT: Early CSE +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O3-NEXT: Lower 'expect' Intrinsics ; GCN-O3-NEXT: Pass Arguments: @@ -761,6 +777,9 @@ ; GCN-O3-NEXT: Call Graph SCC Pass Manager ; GCN-O3-NEXT: Remove unused exception handling info ; GCN-O3-NEXT: AMDGPU Function Integration/Inlining +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O3-NEXT: OpenMP specific optimizations ; GCN-O3-NEXT: Deduce function attributes ; GCN-O3-NEXT: Promote 'by reference' arguments to scalars @@ -775,7 +794,9 @@ ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Early CSE w/ MemorySSA +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O3-NEXT: Speculatively execute instructions if target has divergent branches +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lazy Value Information Analysis ; GCN-O3-NEXT: Jump Threading @@ -866,6 +887,9 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Combine redundant instructions +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lazy Value Information Analysis ; GCN-O3-NEXT: Jump Threading ; GCN-O3-NEXT: Value Propagation Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -104,6 +104,7 @@ ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. @@ -143,6 +144,7 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -188,6 +190,7 @@ ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVN ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -79,6 +79,7 @@ ; CHECK-O2-NEXT: Running pass: JumpThreadingPass ; CHECK-O2-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O2-NEXT: Running pass: SROA on foo +; CHECK-O2-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O2-NEXT: Running pass: TailCallElimPass on foo ; CHECK-O2-NEXT: Finished llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: PostOrderFunctionAttrsPass Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -69,6 +69,7 @@ ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. @@ -108,6 +109,7 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -150,6 +152,7 @@ ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -39,6 +39,7 @@ ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. @@ -82,6 +83,7 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -123,6 +125,7 @@ ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -41,6 +41,7 @@ ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: InstCombinePass on foo @@ -90,6 +91,7 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -131,6 +133,7 @@ ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -43,6 +43,7 @@ ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. @@ -69,6 +70,7 @@ ; CHECK-O123-NEXT: Running pass: InlinerPass on (foo) ; CHECK-O123-NEXT: Running pass: SROA on foo ; CHECK-O123-NEXT: Running pass: EarlyCSEPass on foo +; CHECK-O123-NEXT: Running pass: PropagateAndConvertNoAliasPass on foo ; CHECK-O123-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O123-NEXT: Running pass: InstCombinePass on foo ; CHECK-O123-NEXT: Finished CGSCC pass manager run. @@ -123,6 +125,7 @@ ; CHECK-Os-NEXT: Running analysis: TargetIRAnalysis on foo ; CHECK-Oz-NEXT: Running analysis: TargetIRAnalysis on foo ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -173,6 +176,7 @@ ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -42,6 +42,7 @@ ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: InstCombinePass on foo @@ -89,6 +90,7 @@ ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -129,6 +131,7 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Finished Loop pass manager run. ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/opt-O2-pipeline.ll =================================================================== --- llvm/test/Other/opt-O2-pipeline.ll +++ llvm/test/Other/opt-O2-pipeline.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -59,6 +60,9 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager @@ -68,7 +72,9 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading @@ -153,6 +159,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/opt-O3-pipeline-enable-matrix.ll =================================================================== --- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -62,6 +63,9 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: Promote 'by reference' arguments to scalars @@ -72,7 +76,9 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading @@ -158,6 +164,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/opt-O3-pipeline.ll =================================================================== --- llvm/test/Other/opt-O3-pipeline.ll +++ llvm/test/Other/opt-O3-pipeline.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -62,6 +63,9 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: Promote 'by reference' arguments to scalars @@ -72,7 +76,9 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading @@ -158,6 +164,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/opt-Os-pipeline.ll =================================================================== --- llvm/test/Other/opt-Os-pipeline.ll +++ llvm/test/Other/opt-Os-pipeline.ll @@ -17,6 +17,7 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -59,6 +60,9 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager @@ -68,7 +72,9 @@ ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading @@ -139,6 +145,9 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/pass-pipelines.ll =================================================================== --- llvm/test/Other/pass-pipelines.ll +++ llvm/test/Other/pass-pipelines.ll @@ -46,6 +46,9 @@ ; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Remove unused exception handling info ; CHECK-O2-NEXT: Function Integration/Inlining +; CHECK-O2-NEXT: FunctionPass Manager +; CHECK-O2-NEXT: Dominator Tree Construction +; CHECK-O2-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-O2-NEXT: OpenMP specific optimizations ; CHECK-O2-NEXT: Deduce function attributes ; Next up is the main function pass pipeline. It shouldn't be split up and Index: llvm/test/Transforms/PropagateAndConvertNoAlias/basictest.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/basictest.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +@gpA = common dso_local global i32* null, align 4 + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) #0 { +entry: + %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %pA = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx = getelementptr inbounds i32, i32* %pA, i32 10 + store i32 42, i32* %arrayidx, align 4, !tbaa !9, !noalias !2 + %pA.2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %add.ptr = getelementptr inbounds i32, i32* %pA.2, i32 1 + %pA.3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %add.ptr, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32, i32* %pA.3, i32 11 + store i32 43, i32* %arrayidx1, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %0 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 10 +; CHECK-NEXT: store i32 42, i32* %arrayidx, ptr_provenance i32* %0, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %_pA, i32 1 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds i32, i32* %add.ptr, i32 11 +; CHECK-NEXT: store i32 43, i32* %arrayidx1, ptr_provenance i32* %0, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local i32* @test02(i32* %_pA) #0 { +entry: + %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %pA.1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32* %pA.1, i32** @gpA, align 4, !tbaa !5, !noalias !11 + %pA.2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + ret i32* %pA.2 +} + +; CHECK-LABEL: @test02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %0 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: %pA.1.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %0) +; CHECK-NEXT: store i32* %pA.1.guard, i32** @gpA, align 4, !tbaa !5, !noalias !11 +; CHECK-NEXT: %pA.2.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %0) +; CHECK-NEXT: ret i32* %pA.2.guard +; CHECK-NEXT: } + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: pA"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test02: pA"} +!13 = distinct !{!13, !"test02"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/call.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/call.ll @@ -0,0 +1,112 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local i32* @passP(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %1, align 4, !tbaa !9, !noalias !2 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + ret i32* %2 +} + +; CHECK-LABEL: @passP( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) +; CHECK-NEXT: ret i32* %.guard +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 41, i32* %1, align 4, !tbaa !9, !noalias !11 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + %call = call i32* @passP(i32* %2), !noalias !11 + store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) +; CHECK-NEXT: %call = call i32* @passP(i32* %.guard), !noalias !11 +; CHECK-NEXT: store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 41, i32* %1, align 4, !tbaa !9, !noalias !11 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + br label %block + +block: + %tmp0 = phi i32* [ %2, %entry ] + %tmp1 = phi i32* [ %1, %entry ] + %call = call i32* @passP(i32* %tmp0), !noalias !11 + store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 + ret void +} + + +; CHECK-LABEL: @test02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: br label %block +; CHECK: block: +; CHECK-NEXT: %prov.tmp0 = phi i32* [ %1, %entry ] +; CHECK-NEXT: %tmp0 = phi i32* [ %_pA, %entry ] +; CHECK-NEXT: %prov.tmp1 = phi i32* [ %1, %entry ] +; CHECK-NEXT: %tmp1 = phi i32* [ %_pA, %entry ] +; CHECK-NEXT: %tmp0.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %tmp0, i32* %prov.tmp0) +; CHECK-NEXT: %call = call i32* @passP(i32* %tmp0.guard), !noalias !11 +; CHECK-NEXT: store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { argmemonly nounwind speculatable } +attributes #4 = { nounwind readnone speculatable } +attributes #5 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"passP: pA"} +!4 = distinct !{!4, !"passP"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test01: p1"} +!13 = distinct !{!13, !"test01"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/degenerated.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/degenerated.ll @@ -0,0 +1,126 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 + %.guard.guard.guard.guard.i = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %3) #1 + store i32 43, i32* %.guard.guard.guard.guard.i, ptr_provenance i32* undef, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #{{[0-9]+}}, !noalias !2 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %1, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #{{[0-9]+}}, !tbaa !5, !noalias !14 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 +; CHECK-NEXT: store i32 43, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 + %deg01 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %.guard) #1 + %deg02 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* undef) #1 + call void @foo(i32* %deg01), !noalias !14 + call void @foo(i32* %deg01), !noalias !14 + ret void +} + +; CHECK-LABEL: @test02( +; CHECK: ret void + +%class.e = type { %class.a } +%class.a = type { i32 } + +@g = global %class.e zeroinitializer, align 4 + +; Function Attrs: nounwind +define internal fastcc void @test03() unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0s_class.es.i64(%class.e** null, i64 0, metadata !15) + %1 = tail call %class.e* @llvm.provenance.noalias.p0s_class.es.p0i8.p0p0s_class.es.p0p0s_class.es.i64(%class.e* nonnull @g, i8* %0, %class.e** null, %class.e** undef, i64 0, metadata !15) + %2 = getelementptr inbounds %class.e, %class.e* %1, i32 0, i32 0 + %.guard.guard = tail call %class.a* @llvm.noalias.arg.guard.p0s_class.as.p0s_class.as(%class.a* getelementptr inbounds (%class.e, %class.e* @g, i32 0, i32 0), %class.a* %2) + call void @foobar03(%class.a* %.guard.guard, i32 5), !noalias !15 + ret void +} + +; CHECK-LABEL: @test03( +; CHECK: call %class.a* @llvm.noalias.arg.guard.p0s_class.as.p0s_class.as +; CHECK: ret void + +; Function Attrs: nounwind +declare void @foobar03(%class.a*, i32) local_unnamed_addr #0 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0s_class.es.i64(%class.e**, i64, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare %class.e* @llvm.provenance.noalias.p0s_class.es.p0i8.p0p0s_class.es.p0p0s_class.es.i64(%class.e*, i8*, %class.e**, %class.e**, i64, metadata) #3 + +; Function Attrs: nounwind readnone +declare %class.a* @llvm.noalias.arg.guard.p0s_class.as.p0s_class.as(%class.a*, %class.a*) #4 + +declare void @foo(i32*) + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: p1"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"passP: pA"} +!13 = distinct !{!13, !"passP"} +!14 = !{!12, !3} +!15 = !{!16} +!16 = distinct !{!16, !17, !"test03: %agg.result"} +!17 = distinct !{!17, !"test03"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/double_noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/double_noalias.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +; Function Attrs: nounwind +define dso_local void @test_rr(i32** %_p) #0 !noalias !2 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32*** null, i32 0, metadata !5) + %1 = call i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + %arrayidx = getelementptr inbounds i32*, i32** %1, i32 1 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* null, i32** %arrayidx, i32 0, metadata !2), !tbaa !7, !noalias !11 + %arrayidx1 = getelementptr inbounds i32, i32* %3, i32 2 + %4 = load i32, i32* %arrayidx1, align 4, !tbaa !12, !noalias !11 + %add = add nsw i32 %4, 1 + %5 = call i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + %6 = load i32*, i32** %5, align 4, !tbaa !7, !noalias !11 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %6, i8* null, i32** %5, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 %add, i32* %7, align 4, !tbaa !12, !noalias !11 + ret void +} + +; CHECK-LABEL: @test_rr( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32*** null, i32 0, metadata !5) +; CHECK-NEXT: %1 = call i32** @llvm.provenance.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32*** undef, i32 0, metadata !5), !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_p, i32 1 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, ptr_provenance i32** %1, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* null, i32** %arrayidx, i32** %1, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 2 +; CHECK-NEXT: %4 = load i32, i32* %arrayidx1, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %add = add nsw i32 %4, 1 +; CHECK-NEXT: %5 = load i32*, i32** %_p, ptr_provenance i32** %1, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %6 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %5, i8* null, i32** %_p, i32** %1, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: store i32 %add, i32* %5, ptr_provenance i32* %6, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32***, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32**, i8*, i32***, i32, metadata) #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind speculatable } +attributes #2 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_rr: unknown scope"} +!4 = distinct !{!4, !"test_rr"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_rr: rprp"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!6, !3} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/inlined.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/inlined.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 + %.guard.guard.guard.guard.i = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %3) #1 + store i32 43, i32* %.guard.guard.guard.guard.i, ptr_provenance i32* undef, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #{{[0-9]+}}, !noalias !2 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %1, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #{{[0-9]+}}, !tbaa !5, !noalias !14 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 +; CHECK-NEXT: store i32 43, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: p1"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"passP: pA"} +!13 = distinct !{!13, !"passP"} +!14 = !{!12, !3} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/noalias_cleanup.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/noalias_cleanup.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo() local_unnamed_addr #0 { +entry: + br label %for.cond + +for.cond: ; preds = %if.then, %entry + %prov.bar.0 = phi i32* [ undef, %entry ], [ %prov.bar.0, %if.then ] + br i1 undef, label %for.cond3thread-pre-split, label %if.then + +if.then: ; preds = %for.cond + %0 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %prov.bar.0, i8* undef, i32** null, i32** undef, i64 0, metadata !1) + br i1 undef, label %for.cond, label %for.body + +for.body: ; preds = %for.body, %if.then + %prov.bar.116 = phi i32* [ %1, %for.body ], [ %prov.bar.0, %if.then ] + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %prov.bar.116, i8* undef, i32** null, i32** undef, i64 0, metadata !1) + br label %for.body + +for.cond3thread-pre-split: ; preds = %for.cond + br label %for.body5 + +for.body5: ; preds = %for.body5, %for.cond3thread-pre-split + %prov.bar.220 = phi i32* [ %2, %for.body5 ], [ %prov.bar.0, %for.cond3thread-pre-split ] + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %prov.bar.220, i8* undef, i32** null, i32** undef, i64 0, metadata !1) + br label %for.body5 +} + +; CHECK-LABEL: @foo +; CHECK: call i32* @llvm.provenance.noalias +; CHECK-NOT: call i32* @llvm.provenance.noalias + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #1 + +attributes #0 = { "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } + +!llvm.ident = !{!0} + +!0 = !{!"clang)"} +!1 = !{!2} +!2 = distinct !{!2, !3, !"foo: bar"} +!3 = distinct !{!3, !"foo"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/reduced01.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/reduced01.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +; RUN: opt < %s -convert-noalias -verify -convert-noalias -verify -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +%struct.a = type { i8 } + +; Function Attrs: noreturn +define dso_local void @_Z3fooPii(i32* %_f, i32 %g) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_f, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %2 = bitcast i32* %1 to %struct.a* + %3 = bitcast i32* %_f to %struct.a* + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %prov.h.0 = phi %struct.a* [ %2, %entry ], [ %h.0.guard, %for.cond ] + %h.0 = phi %struct.a* [ %3, %entry ], [ %h.0.guard, %for.cond ] + %h.0.guard = call %struct.a* @llvm.noalias.arg.guard.p0s_struct.as.p0s_struct.as(%struct.a* %h.0, %struct.a* %prov.h.0) + %4 = getelementptr inbounds %struct.a, %struct.a* %h.0, i32 0, i32 0 + %.unpack = load i8, i8* %4, ptr_provenance %struct.a* %prov.h.0, align 1, !noalias !2 + %5 = insertvalue %struct.a undef, i8 %.unpack, 0 + %call = call i32 @_Z1b1a(%struct.a %5), !noalias !2 + br label %for.cond +} +; CHECK: define dso_local void @_Z3fooPii(i32* %_f, i32 %g) local_unnamed_addr #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_f, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: %2 = bitcast i32* %1 to %struct.a* +; CHECK-NEXT: %3 = bitcast i32* %_f to %struct.a* +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: ; preds = %for.cond, %entry +; CHECK-NEXT: %prov.h.0 = phi %struct.a* [ %2, %entry ], [ %prov.h.0, %for.cond ] +; CHECK-NEXT: %prov.h.01 = phi %struct.a* [ %3, %entry ], [ %prov.h.0, %for.cond ] +; CHECK-NEXT: %h.0 = phi %struct.a* [ %3, %entry ], [ %h.0, %for.cond ] +; CHECK-NEXT: %4 = getelementptr inbounds %struct.a, %struct.a* %h.0, i32 0, i32 0 +; CHECK-NEXT: %.unpack = load i8, i8* %4, ptr_provenance %struct.a* %prov.h.0, align 1, !noalias !2 +; CHECK-NEXT: %5 = insertvalue %struct.a undef, i8 %.unpack, 0 +; CHECK-NEXT: %call = call i32 @_Z1b1a(%struct.a %5), !noalias !2 +; CHECK-NEXT: br label %for.cond + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +declare dso_local i32 @_Z1b1a(%struct.a) local_unnamed_addr #2 + +; Function Attrs: nounwind readnone +declare %struct.a* @llvm.noalias.arg.guard.p0s_struct.as.p0s_struct.as(%struct.a*, %struct.a*) #3 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #4 + +attributes #0 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind readnone } +attributes #4 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 7.0.0 "} +!2 = !{!3} +!3 = distinct !{!3, !4, !"_Z3fooPii: f"} +!4 = distinct !{!4, !"_Z3fooPii"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/select_and_phi.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/select_and_phi.ll @@ -0,0 +1,208 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +; RUN: opt < %s -convert-noalias -verify -convert-noalias -verify -S | FileCheck %s + +; Function Attrs: nounwind +define dso_local void @test_phi01(i32* %_pA, i32** %_pB, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !11 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !11 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] + %pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 99, i32* %6, align 4, !tbaa !12, !noalias !11 + store i32 42, i32* %pTmp00.0, align 4, !tbaa !12, !noalias !11 + %cmp5 = icmp sgt i32 %n, 5 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32 0, metadata !5) + %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %7 + store i32 43, i32* %cond, align 4, !tbaa !12, !noalias !11 + ret void + +for.body: ; preds = %for.cond + %arrayidx3 = getelementptr inbounds i32, i32* %pTmp01.0, i32 1 + %8 = load i32, i32* %arrayidx3, align 4, !tbaa !12, !noalias !11 + %arrayidx4 = getelementptr inbounds i32, i32* %pTmp00.0, i32 1 + store i32 %8, i32* %arrayidx4, align 4, !tbaa !12, !noalias !11 + %inc = add nsw i32 %i.0, 1 + br label %for.cond +} + +; CHECK-LABEL: @test_phi01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 +; CHECK-NEXT: %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: +; CHECK-NEXT: %prov.pTmp00.0 = phi i32* [ %4, %entry ], [ %prov.pTmp01.0, %for.body ] +; CHECK-NEXT: %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] +; CHECK-NEXT: %prov.pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] +; CHECK-NEXT: %pTmp01.0 = phi i32* [ %5, %entry ], [ %_pA, %for.body ] +; CHECK-NEXT: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +; CHECK-NEXT: %cmp = icmp slt i32 %i.0, %n +; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: for.cond.cleanup: +; CHECK-NEXT: store i32 99, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: store i32 42, i32* %pTmp00.0, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %cmp5 = icmp sgt i32 %n, 5 +; CHECK-NEXT: %6 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32** undef, i32 0, metadata !5) +; CHECK-NEXT: %prov.cond = select i1 %cmp5, i32* %prov.pTmp00.0, i32* %6 +; CHECK-NEXT: %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %2 +; CHECK-NEXT: store i32 43, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %pTmp01.0, i32 1 +; CHECK-NEXT: %7 = load i32, i32* %arrayidx3, ptr_provenance i32* %prov.pTmp01.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, i32* %pTmp00.0, i32 1 +; CHECK-NEXT: store i32 %7, i32* %arrayidx4, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 +; CHECK-NEXT: br label %for.cond +; CHECK-NEXT: } + + +; Function Attrs: nounwind +define dso_local void @test_phi02(i32* %_pA, i32** %_pB, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) + %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !19 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !14), !tbaa !7, !noalias !19 + %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !19 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !19 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] + %pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !14), !tbaa !7, !noalias !19 + %7 = bitcast i32* %6 to i16* + store i16 99, i16* %7, align 2, !tbaa !20, !noalias !19 + %8 = bitcast i32* %pTmp00.0 to i16* + store i16 42, i16* %8, align 2, !tbaa !20, !noalias !19 + %cmp5 = icmp sgt i32 %n, 5 + %9 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32 0, metadata !17) + %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %9 + %10 = bitcast i32* %cond to i16* + store i16 43, i16* %10, align 2, !tbaa !20, !noalias !19 + ret void + +for.body: ; preds = %for.cond + %11 = bitcast i32* %pTmp01.0 to i16* + %arrayidx3 = getelementptr inbounds i16, i16* %11, i32 1 + %12 = load i16, i16* %arrayidx3, align 2, !tbaa !20, !noalias !19 + %13 = bitcast i32* %pTmp00.0 to i16* + %arrayidx4 = getelementptr inbounds i16, i16* %13, i32 1 + store i16 %12, i16* %arrayidx4, align 2, !tbaa !20, !noalias !19 + %inc = add nsw i32 %i.0, 1 + br label %for.cond +} + +; CHECK-LABEL: @test_phi02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !14), !tbaa !7, !noalias !19 +; CHECK-NEXT: %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 +; CHECK-NEXT: %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: +; CHECK-NEXT: %prov.pTmp00.0 = phi i32* [ %4, %entry ], [ %prov.pTmp01.0, %for.body ] +; CHECK-NEXT: %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] +; CHECK-NEXT: %prov.pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] +; CHECK-NEXT: %pTmp01.0 = phi i32* [ %5, %entry ], [ %_pA, %for.body ] +; CHECK-NEXT: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +; CHECK-NEXT: %6 = bitcast i32* %prov.pTmp00.0 to i16* +; CHECK-NEXT: %7 = bitcast i32* %prov.pTmp01.0 to i16* +; CHECK-NEXT: %cmp = icmp slt i32 %i.0, %n +; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: for.cond.cleanup: +; CHECK-NEXT: %8 = bitcast i32* %3 to i16* +; CHECK-NEXT: %9 = bitcast i32* %_pA to i16* +; CHECK-NEXT: store i16 99, i16* %9, ptr_provenance i16* %8, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %10 = bitcast i32* %pTmp00.0 to i16* +; CHECK-NEXT: store i16 42, i16* %10, ptr_provenance i16* %6, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %cmp5 = icmp sgt i32 %n, 5 +; CHECK-NEXT: %11 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32** undef, i32 0, metadata !17) +; CHECK-NEXT: %prov.cond = select i1 %cmp5, i32* %prov.pTmp00.0, i32* %11 +; CHECK-NEXT: %12 = bitcast i32* %prov.cond to i16* +; CHECK-NEXT: %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %2 +; CHECK-NEXT: %13 = bitcast i32* %cond to i16* +; CHECK-NEXT: store i16 43, i16* %13, ptr_provenance i16* %12, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: %14 = bitcast i32* %pTmp01.0 to i16* +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds i16, i16* %14, i32 1 +; CHECK-NEXT: %15 = load i16, i16* %arrayidx3, ptr_provenance i16* %7, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %16 = bitcast i32* %pTmp00.0 to i16* +; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i16, i16* %16, i32 1 +; CHECK-NEXT: store i16 %15, i16* %arrayidx4, ptr_provenance i16* %6, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 +; CHECK-NEXT: br label %for.cond +; CHECK-NEXT: } + + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_phi01: rpTmp"} +!4 = distinct !{!4, !"test_phi01"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_phi01: rp2"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!3, !6} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_phi02: rpTmp"} +!16 = distinct !{!16, !"test_phi02"} +!17 = !{!18} +!18 = distinct !{!18, !16, !"test_phi02: rp2"} +!19 = !{!15, !18} +!20 = !{!21, !21, i64 0, i64 2} +!21 = !{!9, i64 2, !"short"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/struct.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/struct.ll @@ -0,0 +1,117 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_rFOO(%struct.FOO* %_pFOO, i32* %_pA) #0 !noalias !2 { +entry: + %tmp = alloca %struct.FOO, align 4 + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO** null, i32 0, metadata !5) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !7) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 4, metadata !7) + %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !7) + %4 = call %struct.FOO* @llvm.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO* %_pFOO, i8* %0, %struct.FOO** null, i32 0, metadata !5), !tbaa !9, !noalias !13 + %5 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %4, i8* null, metadata !14, metadata !2) + %6 = load %struct.FOO, %struct.FOO* %5, align 4, !noalias !13 + %.fca.0.extract = extractvalue %struct.FOO %6, 0 + %.fca.1.extract = extractvalue %struct.FOO %6, 1 + %.fca.2.extract = extractvalue %struct.FOO %6, 2 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.0.extract, i8* %1, i32** null, i32 0, metadata !7), !tbaa !18, !noalias !13 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32 4, metadata !7), !tbaa !20, !noalias !13 + %9 = load i32, i32* %8, align 4, !tbaa !21, !noalias !13 + store i32 %9, i32* %7, align 4, !tbaa !21, !noalias !13 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32 0, metadata !7), !tbaa !18, !noalias !13 + store i32 42, i32* %10, align 4, !tbaa !21, !noalias !13 + %.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32 0, metadata !7) + %.fca.0.insert = insertvalue %struct.FOO undef, i32* %.fca.0.load.noalias, 0 + %.fca.1.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32 4, metadata !7) + %.fca.1.insert = insertvalue %struct.FOO %.fca.0.insert, i32* %.fca.1.load.noalias, 1 + %.fca.2.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.2.extract, i8* %3, i32** null, i32 8, metadata !7) + %.fca.2.insert = insertvalue %struct.FOO %.fca.1.insert, i32* %.fca.2.load.noalias, 2 + call void @fum(%struct.FOO %.fca.2.insert), !noalias !13 + ret void +} + +; CHECK-LABEL: @test_rFOO( +; CHECK-NEXT: entry: +; CHECK-NEXT: %tmp = alloca %struct.FOO, align 4 +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO** null, i32 0, metadata !5) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !7) +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 4, metadata !7) +; CHECK-NEXT: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !7) +; CHECK-NEXT: %4 = call %struct.FOO* @llvm.provenance.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.p0p0s_struct.FOOs.i32(%struct.FOO* %_pFOO, i8* %0, %struct.FOO** null, %struct.FOO** undef, i32 0, metadata !5), !tbaa !9, !noalias !13 +; CHECK-NEXT: %.guard = call %struct.FOO* @llvm.noalias.arg.guard.p0s_struct.FOOs.p0s_struct.FOOs(%struct.FOO* %_pFOO, %struct.FOO* %4) +; CHECK-NEXT: %5 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %.guard, i8* null, metadata !14, metadata !2) +; CHECK-NEXT: %6 = load %struct.FOO, %struct.FOO* %5, align 4, !noalias !13 +; CHECK-NEXT: %.fca.0.extract = extractvalue %struct.FOO %6, 0 +; CHECK-NEXT: %.fca.1.extract = extractvalue %struct.FOO %6, 1 +; CHECK-NEXT: %.fca.2.extract = extractvalue %struct.FOO %6, 2 +; CHECK-NEXT: %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.0.extract, i8* %1, i32** null, i32** undef, i32 0, metadata !7), !tbaa !18, !noalias !13 +; CHECK-NEXT: %8 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32** undef, i32 4, metadata !7), !tbaa !20, !noalias !13 +; CHECK-NEXT: %9 = load i32, i32* %.fca.1.extract, ptr_provenance i32* %8, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: store i32 %9, i32* %.fca.0.extract, ptr_provenance i32* %7, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: %10 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32** undef, i32 0, metadata !7), !tbaa !18, !noalias !13 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %10, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: %.fca.0.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %10) +; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.FOO undef, i32* %.fca.0.load.noalias.guard, 0 +; CHECK-NEXT: %.fca.1.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.fca.1.extract, i32* %8) +; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.FOO %.fca.0.insert, i32* %.fca.1.load.noalias.guard, 1 +; CHECK-NEXT: %11 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.2.extract, i8* %3, i32** null, i32** undef, i32 8, metadata !7) +; CHECK-NEXT: %.fca.2.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.fca.2.extract, i32* %11) +; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.FOO %.fca.1.insert, i32* %.fca.2.load.noalias.guard, 2 +; CHECK-NEXT: call void @fum(%struct.FOO %.fca.2.insert), !noalias !13 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; Function Attrs: argmemonly nounwind speculatable +declare %struct.FOO* @llvm.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO*, i8*, %struct.FOO**, i32, metadata) #1 + +; Function Attrs: nounwind readnone +declare %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO*, i8*, metadata, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #3 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO**, i32, metadata) #3 + +declare dso_local void @fum(%struct.FOO) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind speculatable } +attributes #2 = { nounwind readnone } +attributes #3 = { argmemonly nounwind } +attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_rFOO: unknown scope"} +!4 = distinct !{!4, !"test_rFOO"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_rFOO: rpFOO"} +!7 = !{!8} +!8 = distinct !{!8, !4, !"test_rFOO: tmp"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!11, i64 4, !"any pointer"} +!11 = !{!12, i64 1, !"omnipotent char"} +!12 = !{!"Simple C/C++ TBAA"} +!13 = !{!6, !8, !3} +!14 = !{!15, !16, !17} +!15 = !{i64 -1, i64 0} +!16 = !{i64 -1, i64 1} +!17 = !{i64 -1, i64 2} +!18 = !{!19, !10, i64 0, i64 4} +!19 = !{!11, i64 12, !"FOO", !10, i64 0, i64 4, !10, i64 4, i64 4, !10, i64 8, i64 4} +!20 = !{!19, !10, i64 4, i64 4} +!21 = !{!22, !22, i64 0, i64 4} +!22 = !{!11, i64 4, !"int"}