diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -2307,6 +2307,20 @@ :ref:`stackmap entry `. See the intrinsic description for further details. + +.. _nocapture_use: + +No Capture Use Operand Bundles + +A "nocapture_use" operand bundle indicates that the instruction uses the +operands of the operand bundle but does not capture them. It can be used in +combination with the ``!nocapture`` metadata on stores to express that a pointer +is stored into a memory and passed to a function without being captured. That is, +the pointer store is not capturing the pointer, nor is the use of the memory in +the instruction with the "nocapture_use" operand bundle but the indirection via +memory is only needed for ABI reasons. + + .. _moduleasm: Module-Level Inline Assembly @@ -9460,6 +9474,16 @@ The optional ``!invariant.group`` metadata must reference a single metadata name ````. See ``invariant.group`` metadata. +The optional ``!nocapture`` metadata must reference a single metadata name +```` corresponding to a node with no entries. The existence of +``!nocapture`` metadata on the instruction tells the optimizer that the pointer +stored is not captured in the sense that all uses of the pointer are explicitly +marked otherwise and the storing can be ignored during capture analysis. +The ``!nocapture`` metadata can be used with the :ref:`"nocapture_use" operand +bundle ` to indicate a store is a necessasity of an ABI but the +user of the memory the pointer is stored into is using the pointer value without +capturing it. + Semantics: """""""""" diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -42,3 +42,4 @@ LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28) LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29) LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) +LLVM_FIXED_MD_KIND(MD_nocapture, "nocapture", 31) diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -331,10 +332,13 @@ break; case Instruction::Store: // Stored the pointer - conservatively assume it may be captured. - // Volatile stores make the address observable. + // Volatile stores make the address observable. We do however ignore + // stores with the !nocapture metadata as it guarantees the pointer + // is not captured (in any way). if (U->getOperandNo() == 0 || cast(I)->isVolatile()) - if (Tracker->captured(U)) - return; + if (!cast(I)->hasMetadata(LLVMContext::MD_nocapture)) + if (Tracker->captured(U)) + return; break; case Instruction::AtomicRMW: { // atomicrmw conceptually includes both a load and store from diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -428,6 +428,7 @@ void visitDereferenceableMetadata(Instruction &I, MDNode *MD); void visitProfMetadata(Instruction &I, MDNode *MD); void visitAnnotationMetadata(MDNode *Annotation); + void visitNocaptureMetadata(Instruction &I, MDNode *Nocapture); template bool isValidMetadataArray(const MDTuple &N); #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N); @@ -4288,6 +4289,13 @@ Assert(isa(Op.get()), "operands must be strings"); } +void Verifier::visitNocaptureMetadata(Instruction &I, MDNode *Nocapture) { + Assert(isa(Nocapture), "nocapture must be a tuple"); + Assert(Nocapture->getNumOperands() == 0, + "nocaptrue must have at least one operand"); + Assert(isa(I), "nocapture must be attached to a store"); +} + /// verifyInstruction - Verify that an instruction is well formed. /// void Verifier::visitInstruction(Instruction &I) { @@ -4451,6 +4459,9 @@ if (MDNode *Annotation = I.getMetadata(LLVMContext::MD_annotation)) visitAnnotationMetadata(Annotation); + if (MDNode *Nocapture = I.getMetadata(LLVMContext::MD_nocapture)) + visitNocaptureMetadata(I, Nocapture); + if (MDNode *N = I.getDebugLoc().getAsMDNode()) { AssertDI(isa(N), "invalid !dbg metadata attachment", &I, N); visitMDNode(*N, AreDebugLocsAllowed::Yes); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -14,7 +14,10 @@ #include "llvm/Transforms/IPO/OpenMPOpt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/EnumeratedArray.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" @@ -23,6 +26,10 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/IPO.h" @@ -506,6 +513,8 @@ if (PrintOpenMPKernels) printKernels(); + Changed |= introducNocaptureMetadata(); + Changed |= rewriteDeviceCodeStateMachine(); Changed |= runAttributor(); @@ -874,6 +883,115 @@ return Changed; } + /// Try to introduce !nocapture metadata for stores which won't cause pointers + /// to escape. + bool introducNocaptureMetadata() { + struct NoCaptureUseTy { + RuntimeFunction RF; + unsigned ArgumentNo0; + unsigned ArgumentNo1; + }; + + NoCaptureUseTy NoCaptureUses[] = { + {OMPRTL___kmpc_reduce, 4, unsigned(-1)}, + {OMPRTL___kmpc_reduce_nowait, 4, unsigned(-1)}, + {OMPRTL___tgt_target_data_begin_mapper, 3, 4}, + {OMPRTL___tgt_target_data_begin_nowait_mapper, 3, 4}, + {OMPRTL___tgt_target_data_begin_mapper_issue, 3, 4}, + {OMPRTL___tgt_target_data_begin_mapper_wait, 3, 4}, + {OMPRTL___tgt_target_data_end_mapper, 3, 4}, + {OMPRTL___tgt_target_data_end_nowait_mapper, 3, 4}, + {OMPRTL___tgt_target_data_update_mapper, 3, 4}, + {OMPRTL___tgt_target_data_update_nowait_mapper, 3, 4}, + }; + + bool Changed = false; + + SmallVector AIUses; + for (NoCaptureUseTy &NCU : NoCaptureUses) { + bool LocalChanged = false; + auto &RFI = OMPInfoCache.RFIs[NCU.RF]; + if (!RFI.Declaration) + continue; + + SmallVector OldCIs; + auto HandleNoCaptureUse = [&](Use &U, Function &Decl) { + auto *RTCall = getCallIfRegularCall(U, &RFI); + if (!RTCall) + return false; + + SmallVector SIs; + bool OneCall = false; + SmallVector AIs; + for (unsigned ArgumentNo : {NCU.ArgumentNo0, NCU.ArgumentNo1}) { + if (ArgumentNo == unsigned(-1)) + continue; + OneCall = false; + Value *CallArg = RTCall->getArgOperand(ArgumentNo); + AllocaInst *AI = dyn_cast(CallArg->stripPointerCasts()); + AIUses.clear(); + for (auto &U : AI->uses()) + AIUses.push_back(&U); + while (!AIUses.empty()) { + Use *U = AIUses.pop_back_val(); + if (isa(U->getUser()) || + isa(U->getUser())) { + for (auto &U : U->getUser()->uses()) + AIUses.push_back(&U); + continue; + } + if (auto *SI = dyn_cast(U->getUser())) { + // Give up if we have seen another store already or the use is not + // the pointer operand. + if (U->getOperandNo() != /* PointerOperandNo */ 1) + return false; + SIs.push_back(SI); + continue; + } + if (auto *CI = dyn_cast(U->getUser())) { + // Give up if we have seen another store already. + if (OneCall || U->getOperandNo() != ArgumentNo || CI != RTCall) + return false; + OneCall = true; + continue; + } + } + + AIs.push_back(AI); + if (!OneCall || SIs.size() != AIs.size()) + return false; + } + + for (auto *SI : SIs) + SI->setMetadata(LLVMContext::MD_nocapture, + MDNode::get(SI->getContext(), {})); + + SmallVector Bundles; + RTCall->getOperandBundlesAsDefs(Bundles); + Bundles.push_back(OperandBundleDef("nocapture_use", AIs)); + + auto *NewCI = CallInst::Create(RTCall, Bundles, RTCall); + CGUpdater.replaceCallSite(*RTCall, *NewCI); + RTCall->replaceAllUsesWith(NewCI); + OldCIs.push_back(RTCall); + + LocalChanged = true; + return false; + }; + + RFI.foreachUse(SCC, HandleNoCaptureUse); + + if (LocalChanged) { + llvm::for_each(OldCIs, [](CallInst *CI) { CI->eraseFromParent(); }); + RFI.clearUsesMap(); + OMPInfoCache.collectUses(RFI, /*CollectStats*/ false); + Changed = true; + } + } + + return Changed; + } + /// Try to eliminate runtime calls by reusing existing ones. bool deduplicateRuntimeCalls() { bool Changed = false; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -517,6 +517,7 @@ case LLVMContext::MD_nontemporal: case LLVMContext::MD_mem_parallel_loop_access: case LLVMContext::MD_access_group: + case LLVMContext::MD_nocapture: // All of these directly apply. NewStore->setMetadata(ID, N); break;