Diff 557687

llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h

This file was added.

				//===--------- HipStdPar.h - Standard Parallelism passes --------- C++ --===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				/// \file
				///
				/// AcceleratorCodeSelection - Identify all functions reachable from a kernel,
				/// removing those that are unreachable.
				///
				/// AllocationInterposition - Forward calls to allocation / deallocation
				// functions to runtime provided equivalents that allocate memory that is
				// accessible for an accelerator
				//===----------------------------------------------------------------------===//

				#ifndef LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
				#define LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H

				#include "llvm/IR/PassManager.h"

				namespace llvm {

				class Module;
				class ModuleAnaysisManager;

				class HipStdParAcceleratorCodeSelectionPass
				: public PassInfoMixin<HipStdParAcceleratorCodeSelectionPass> {
				public:
				PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);

				static bool isRequired() { return true; }
				};

				class HipStdParAllocationInterpositionPass
				: public PassInfoMixin<HipStdParAllocationInterpositionPass> {
				public:
				PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);

				static bool isRequired() { return true; }
				};

				} // namespace llvm

				#endif // LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H

llvm/lib/Passes/CMakeLists.txt

Show All 13 Lines	add_llvm_component_library(LLVMPasses
intrinsics_gen		intrinsics_gen

LINK_COMPONENTS		LINK_COMPONENTS
AggressiveInstCombine		AggressiveInstCombine
Analysis		Analysis
CodeGen		CodeGen
Core		Core
Coroutines		Coroutines
		HipStdPar
IPO		IPO
InstCombine		InstCombine
IRPrinter		IRPrinter
ObjCARC		ObjCARC
Scalar		Scalar
Support		Support
Target		Target
TransformUtils		TransformUtils
Vectorize		Vectorize
Instrumentation		Instrumentation
)		)

llvm/lib/Passes/PassBuilder.cpp

	Show First 20 Lines • Show All 88 Lines • ▼ Show 20 Lines
	#include "llvm/Support/Regex.h"			#include "llvm/Support/Regex.h"
	#include "llvm/Target/TargetMachine.h"			#include "llvm/Target/TargetMachine.h"
	#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"			#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
	#include "llvm/Transforms/Coroutines/CoroCleanup.h"			#include "llvm/Transforms/Coroutines/CoroCleanup.h"
	#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"			#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
	#include "llvm/Transforms/Coroutines/CoroEarly.h"			#include "llvm/Transforms/Coroutines/CoroEarly.h"
	#include "llvm/Transforms/Coroutines/CoroElide.h"			#include "llvm/Transforms/Coroutines/CoroElide.h"
	#include "llvm/Transforms/Coroutines/CoroSplit.h"			#include "llvm/Transforms/Coroutines/CoroSplit.h"
				#include "llvm/Transforms/HipStdPar/HipStdPar.h"
	#include "llvm/Transforms/IPO/AlwaysInliner.h"			#include "llvm/Transforms/IPO/AlwaysInliner.h"
	#include "llvm/Transforms/IPO/Annotation2Metadata.h"			#include "llvm/Transforms/IPO/Annotation2Metadata.h"
	#include "llvm/Transforms/IPO/ArgumentPromotion.h"			#include "llvm/Transforms/IPO/ArgumentPromotion.h"
	#include "llvm/Transforms/IPO/Attributor.h"			#include "llvm/Transforms/IPO/Attributor.h"
	#include "llvm/Transforms/IPO/BlockExtractor.h"			#include "llvm/Transforms/IPO/BlockExtractor.h"
	#include "llvm/Transforms/IPO/CalledValuePropagation.h"			#include "llvm/Transforms/IPO/CalledValuePropagation.h"
	#include "llvm/Transforms/IPO/ConstantMerge.h"			#include "llvm/Transforms/IPO/ConstantMerge.h"
	#include "llvm/Transforms/IPO/CrossDSOCFI.h"			#include "llvm/Transforms/IPO/CrossDSOCFI.h"
	▲ Show 20 Lines • Show All 1,996 Lines • Show Last 20 Lines

llvm/lib/Passes/PassBuilderPipelines.cpp

	Show All 31 Lines
	#include "llvm/Support/VirtualFileSystem.h"			#include "llvm/Support/VirtualFileSystem.h"
	#include "llvm/Target/TargetMachine.h"			#include "llvm/Target/TargetMachine.h"
	#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"			#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
	#include "llvm/Transforms/Coroutines/CoroCleanup.h"			#include "llvm/Transforms/Coroutines/CoroCleanup.h"
	#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"			#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
	#include "llvm/Transforms/Coroutines/CoroEarly.h"			#include "llvm/Transforms/Coroutines/CoroEarly.h"
	#include "llvm/Transforms/Coroutines/CoroElide.h"			#include "llvm/Transforms/Coroutines/CoroElide.h"
	#include "llvm/Transforms/Coroutines/CoroSplit.h"			#include "llvm/Transforms/Coroutines/CoroSplit.h"
				#include "llvm/Transforms/HipStdPar/HipStdPar.h"
	#include "llvm/Transforms/IPO/AlwaysInliner.h"			#include "llvm/Transforms/IPO/AlwaysInliner.h"
	#include "llvm/Transforms/IPO/Annotation2Metadata.h"			#include "llvm/Transforms/IPO/Annotation2Metadata.h"
	#include "llvm/Transforms/IPO/ArgumentPromotion.h"			#include "llvm/Transforms/IPO/ArgumentPromotion.h"
	#include "llvm/Transforms/IPO/Attributor.h"			#include "llvm/Transforms/IPO/Attributor.h"
	#include "llvm/Transforms/IPO/CalledValuePropagation.h"			#include "llvm/Transforms/IPO/CalledValuePropagation.h"
	#include "llvm/Transforms/IPO/ConstantMerge.h"			#include "llvm/Transforms/IPO/ConstantMerge.h"
	#include "llvm/Transforms/IPO/CrossDSOCFI.h"			#include "llvm/Transforms/IPO/CrossDSOCFI.h"
	#include "llvm/Transforms/IPO/DeadArgumentElimination.h"			#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
	▲ Show 20 Lines • Show All 2,045 Lines • Show Last 20 Lines

llvm/lib/Passes/PassRegistry.def

	Show First 20 Lines • Show All 58 Lines • ▼ Show 20 Lines
	MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())			MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass())
	MODULE_PASS("dxil-upgrade", DXILUpgradePass())			MODULE_PASS("dxil-upgrade", DXILUpgradePass())
	MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())			MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
	MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))			MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
	MODULE_PASS("forceattrs", ForceFunctionAttrsPass())			MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
	MODULE_PASS("function-import", FunctionImportPass())			MODULE_PASS("function-import", FunctionImportPass())
	MODULE_PASS("globalopt", GlobalOptPass())			MODULE_PASS("globalopt", GlobalOptPass())
	MODULE_PASS("globalsplit", GlobalSplitPass())			MODULE_PASS("globalsplit", GlobalSplitPass())
				MODULE_PASS("hipstdpar-select-accelerator-code",
				HipStdParAcceleratorCodeSelectionPass())
				MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
	MODULE_PASS("hotcoldsplit", HotColdSplittingPass())			MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
	MODULE_PASS("inferattrs", InferFunctionAttrsPass())			MODULE_PASS("inferattrs", InferFunctionAttrsPass())
	MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())			MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
	MODULE_PASS("inliner-ml-advisor-release", ModuleInlinerWrapperPass(getInlineParams(), true, {}, InliningAdvisorMode::Release, 0))			MODULE_PASS("inliner-ml-advisor-release", ModuleInlinerWrapperPass(getInlineParams(), true, {}, InliningAdvisorMode::Release, 0))
	MODULE_PASS("print<inline-advisor>", InlineAdvisorAnalysisPrinterPass(dbgs()))			MODULE_PASS("print<inline-advisor>", InlineAdvisorAnalysisPrinterPass(dbgs()))
	MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass(			MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass(
	getInlineParams(),			getInlineParams(),
	false))			false))
	▲ Show 20 Lines • Show All 590 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Show First 20 Lines • Show All 44 Lines • ▼ Show 20 Lines
#include "llvm/CodeGen/RegAllocRegistry.h"		#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"		#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"		#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"		#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"		#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"		#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"		#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"		#include "llvm/Passes/PassBuilder.h"
		#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"		#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"		#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"		#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"		#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"		#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"		#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"		#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"		#include "llvm/Transforms/Utils.h"
▲ Show 20 Lines • Show All 282 Lines • ▼ Show 20 Lines	static cl::opt<bool> EnableMaxIlpSchedStrategy(
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),		cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
cl::Hidden, cl::init(false));		cl::Hidden, cl::init(false));

static cl::opt<bool> EnableRewritePartialRegUses(		static cl::opt<bool> EnableRewritePartialRegUses(
"amdgpu-enable-rewrite-partial-reg-uses",		"amdgpu-enable-rewrite-partial-reg-uses",
cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),		cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),
cl::Hidden);		cl::Hidden);

		static cl::opt<bool> EnableHipStdPar(
		"amdgpu-enable-hipstdpar",
		cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false),
		cl::Hidden);

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {		extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target		// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());		RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());		RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());

PassRegistry *PR = PassRegistry::getPassRegistry();		PassRegistry *PR = PassRegistry::getPassRegistry();
initializeR600ClauseMergePassPass(*PR);		initializeR600ClauseMergePassPass(*PR);
initializeR600ControlFlowFinalizerPass(*PR);		initializeR600ControlFlowFinalizerPass(*PR);
▲ Show 20 Lines • Show All 335 Lines • ▼ Show 20 Lines	void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {

PB.registerPipelineStartEPCallback(		PB.registerPipelineStartEPCallback(
[](ModulePassManager &PM, OptimizationLevel Level) {		[](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;		FunctionPassManager FPM;
FPM.addPass(AMDGPUUseNativeCallsPass());		FPM.addPass(AMDGPUUseNativeCallsPass());
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)		if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
FPM.addPass(AMDGPUSimplifyLibCallsPass());		FPM.addPass(AMDGPUSimplifyLibCallsPass());
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));		PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
		if (EnableHipStdPar)
		PM.addPass(HipStdParAcceleratorCodeSelectionPass());
});		});

PB.registerPipelineEarlySimplificationEPCallback(		PB.registerPipelineEarlySimplificationEPCallback(
[](ModulePassManager &PM, OptimizationLevel Level) {		[](ModulePassManager &PM, OptimizationLevel Level) {
PM.addPass(AMDGPUPrintfRuntimeBindingPass());		PM.addPass(AMDGPUPrintfRuntimeBindingPass());

if (Level == OptimizationLevel::O0)		if (Level == OptimizationLevel::O0)
return;		return;
▲ Show 20 Lines • Show All 949 Lines • Show Last 20 Lines

llvm/lib/Target/AMDGPU/CMakeLists.txt

Show First 20 Lines • Show All 170 Lines • ▼ Show 20 Lines	add_llvm_target(AMDGPUCodeGen
AMDGPUUtils		AMDGPUUtils
Analysis		Analysis
AsmPrinter		AsmPrinter
BinaryFormat		BinaryFormat
CodeGen		CodeGen
CodeGenTypes		CodeGenTypes
Core		Core
GlobalISel		GlobalISel
		HipStdPar
IPO		IPO
MC		MC
MIRParser		MIRParser
Passes		Passes
Scalar		Scalar
SelectionDAG		SelectionDAG
Support		Support
Target		Target
Show All 14 Lines

llvm/lib/Transforms/CMakeLists.txt

	add_subdirectory(Utils)			add_subdirectory(Utils)
	add_subdirectory(Instrumentation)			add_subdirectory(Instrumentation)
	add_subdirectory(AggressiveInstCombine)			add_subdirectory(AggressiveInstCombine)
	add_subdirectory(InstCombine)			add_subdirectory(InstCombine)
	add_subdirectory(Scalar)			add_subdirectory(Scalar)
	add_subdirectory(IPO)			add_subdirectory(IPO)
	add_subdirectory(Vectorize)			add_subdirectory(Vectorize)
	add_subdirectory(Hello)			add_subdirectory(Hello)
	add_subdirectory(ObjCARC)			add_subdirectory(ObjCARC)
	add_subdirectory(Coroutines)			add_subdirectory(Coroutines)
	add_subdirectory(CFGuard)			add_subdirectory(CFGuard)
				add_subdirectory(HipStdPar)

llvm/lib/Transforms/HipStdPar/CMakeLists.txt

This file was added.

				add_llvm_component_library(LLVMHipStdPar
				HipStdPar.cpp

				ADDITIONAL_HEADER_DIRS
				${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/HipStdPar

				DEPENDS
				intrinsics_gen
				LLVMAnalysis

				COMPONENT_NAME
				HipStdPar

				LINK_COMPONENTS
				Analysis
				Core
				Support
				TransformUtils)

llvm/lib/Transforms/HipStdPar/HipStdPar.cpp

This file was added.

				//===----- HipStdPar.cpp - HIP C++ Standard Parallelism Support Passes ----===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				// This file implements two passes that enable HIP C++ Standard Parallelism
				// Support:
				//
				// 1. AcceleratorCodeSelection (required): Given that only algorithms are
				// accelerated, and that the accelerated implementation exists in the form of
				// a compute kernel, we assume that only the kernel, and all functions
				// reachable from it, constitute code that the user expects the accelerator
				// to execute. Thus, we identify the set of all functions reachable from
				// kernels, and then remove all unreachable ones. This last part is necessary
				// because it is possible for code that the user did not expect to execute on
				// an accelerator to contain constructs that cannot be handled by the target
				// BE, which cannot be provably demonstrated to be dead code in general, and
				// thus can lead to mis-compilation. The degenerate case of this is when a
				// Module contains no kernels (the parent TU had no algorithm invocations fit
				// for acceleration), which we handle by completely emptying said module.
				// NOTE: The above does not handle indirectly reachable functions i.e.
				// it is possible to obtain a case where the target of an indirect
				// call is otherwise unreachable and thus is removed; this
				// restriction is aligned with the current `-hipstdpar` limitations
				// and will be relaxed in the future.
				//
				// 2. AllocationInterposition (required only when on-demand paging is
				// unsupported): Some accelerators or operating systems might not support
				// transparent on-demand paging. Thus, they would only be able to access
				// memory that is allocated by an accelerator-aware mechanism. For such cases
				// the user can opt into enabling allocation / deallocation interposition,
				// whereby we replace calls to known allocation / deallocation functions with
				// calls to runtime implemented equivalents that forward the requests to
				// accelerator-aware interfaces. We also support freeing system allocated
				// memory that ends up in one of the runtime equivalents, since this can
				// happen if e.g. a library that was compiled without interposition returns
				// an allocation that can be validly passed to `free`.
				//===----------------------------------------------------------------------===//

				#include "llvm/Transforms/HipStdPar/HipStdPar.h"

				#include "llvm/ADT/SmallPtrSet.h"
				#include "llvm/ADT/SmallVector.h"
				#include "llvm/ADT/STLExtras.h"
				#include "llvm/Analysis/CallGraph.h"
				#include "llvm/Analysis/OptimizationRemarkEmitter.h"
				#include "llvm/IR/Constants.h"
				#include "llvm/IR/DebugInfoMetadata.h"
				#include "llvm/IR/Function.h"
				#include "llvm/IR/Module.h"
				#include "llvm/Transforms/Utils/ModuleUtils.h"

				#include <cassert>
				#include <string>
				#include <utility>

				using namespace llvm;

				template<typename T>
				static inline void eraseFromModule(T &ToErase) {
				ToErase.replaceAllUsesWith(PoisonValue::get(ToErase.getType()));
				yaxunlUnsubmitted Done Reply Inline Actions variable names should be capitalized https://llvm.org/docs/CodingStandards.html#name-types-functions-variables-and-enumerators-properly yaxunl: variable names should be capitalized https://llvm.org/docs/CodingStandards.html#name-types…
				AlexVlxAuthorUnsubmitted Done Reply Inline Actions Will fix, thanks. AlexVlx: Will fix, thanks.
				ToErase.eraseFromParent();
				}

				static inline bool checkIfSupported(GlobalVariable &G) {
				if (!G.isThreadLocal())
				return true;

				G.dropDroppableUses();

				if (!G.isConstantUsed())
				return true;
				yaxunlUnsubmitted Not Done Reply Inline Actions Is this condition correct? AMDGPU backend supports non-constant TLS? yaxunl: Is this condition correct? AMDGPU backend supports non-constant TLS?
				AlexVlxAuthorUnsubmitted Done Reply Inline Actions Typo, will fix, good catch! AlexVlx: Typo, will fix, good catch!
				AlexVlxAuthorUnsubmitted Done Reply Inline Actions Actually, scratch that, I misremembered -> the predicate should read as `Is this Constant Used?` and not `Is only ConstantUsed` i.e. it returns true iff there are non-constant uses, so we only early out iff there are ONLY constexpr uses. So yes, the condition is correct, but the predicate is confusing, a bit:) AlexVlx: Actually, scratch that, I misremembered -> the predicate should read as `Is this Constant Used?

				std::string W;
				raw_string_ostream OS(W);

				OS << "Accelerator does not support the thread_local variable "
				<< G.getName();

				Instruction *I = nullptr;
				SmallVector<User *> Tmp(G.user_begin(), G.user_end());
				SmallPtrSet<User *, 5> Visited;
				do {
				auto U = std::move(Tmp.back());
				Tmp.pop_back();

				if (Visited.contains(U))
				continue;

				if (isa<Instruction>(U))
				I = cast<Instruction>(U);
				else
				Tmp.insert(Tmp.end(), U->user_begin(), U->user_end());

				Visited.insert(U);
				} while (!I && !Tmp.empty());

				assert(I && "thread_local global should have at least one non-constant use.");

				G.getContext().diagnose(
				DiagnosticInfoUnsupported(*I->getParent()->getParent(), W,
				I->getDebugLoc(), DS_Error));

				return false;
				}

				static inline void clearModule(Module &M) { // TODO: simplify.
				while (!M.functions().empty())
				eraseFromModule(*M.begin());
				while (!M.globals().empty())
				eraseFromModule(*M.globals().begin());
				while (!M.aliases().empty())
				eraseFromModule(*M.aliases().begin());
				while (!M.ifuncs().empty())
				eraseFromModule(*M.ifuncs().begin());
				}

				static inline void maybeHandleGlobals(Module &M) {
				unsigned GlobAS = M.getDataLayout().getDefaultGlobalsAddressSpace();
				for (auto &&G : M.globals()) { // TODO: should we handle these in the FE?
				if (!checkIfSupported(G))
				return clearModule(M);

				if (G.isThreadLocal())
				continue;
				if (G.isConstant())
				continue;
				if (G.getAddressSpace() != GlobAS)
				continue;
				if (G.getLinkage() != GlobalVariable::ExternalLinkage)
				continue;

				G.setLinkage(GlobalVariable::ExternalWeakLinkage);
				G.setExternallyInitialized(true);
				yaxunlUnsubmitted Not Done Reply Inline Actions Any reason to change the linkage to weak? yaxunl: Any reason to change the linkage to weak?
				AlexVlxAuthorUnsubmitted Not Done Reply Inline Actions Yes, this is a precursor to adding actual support for globals, which will entail binding them. Since at the moment they are not handled (will require a RT addition), we're giving them weak linkage toe eschew the need to define (bind) them at code object load time, otherwise for declarations this'd lead to a load time error. The other option would be to turn all declarations into definitions, with the initialiser being poison (but then this'd need to be undone when we add support). AlexVlx: Yes, this is a precursor to adding actual support for globals, which will entail binding them.
				}
				}

				template<unsigned N>
				static inline void removeUnreachableFunctions(
				const SmallPtrSet<const Function *, N>& Reachable, Module &M) {
				removeFromUsedLists(M, [&](Constant *C) {
				if (auto F = dyn_cast<Function>(C))
				return !Reachable.contains(F);

				return false;
				});

				SmallVector<std::reference_wrapper<Function>> ToRemove;
				copy_if(M, std::back_inserter(ToRemove), [&](auto &&F) {
				return !F.isIntrinsic() && !Reachable.contains(&F);
				});

				for_each(ToRemove, eraseFromModule<Function>);
				}

				static inline bool isAcceleratorExecutionRoot(const Function *F) {
				if (!F)
				return false;

				return F->getCallingConv() == CallingConv::AMDGPU_KERNEL;
				}

				static inline bool checkIfSupported(const Function F, const CallBase CB) {
				const auto Dx = F->getName().rfind("__hipstdpar_unsupported");

				if (Dx == StringRef::npos)
				return true;

				const auto N = F->getName().substr(0, Dx);

				std::string W;
				raw_string_ostream OS(W);

				if (N == "__ASM")
				OS << "Accelerator does not support the ASM block:\n"
				<< cast<ConstantDataArray>(CB->getArgOperand(0))->getAsCString();
				else
				OS << "Accelerator does not support the " << N << " function.";

				auto Caller = CB->getParent()->getParent();

				Caller->getContext().diagnose(
				DiagnosticInfoUnsupported(*Caller, W, CB->getDebugLoc(), DS_Error));

				return false;
				}

				PreservedAnalyses
				HipStdParAcceleratorCodeSelectionPass::run(Module &M,
				ModuleAnalysisManager &MAM) {
				auto &CGA = MAM.getResult<CallGraphAnalysis>(M);

				SmallPtrSet<const Function *, 32> Reachable;
				for (auto &&CGN : CGA) {
				if (!isAcceleratorExecutionRoot(CGN.first))
				continue;

				Reachable.insert(CGN.first);

				SmallVector<const Function *> Tmp({CGN.first});
				do {
				auto F = std::move(Tmp.back());
				Tmp.pop_back();

				for (auto &&N : *CGA[F]) {
				if (!N.second)
				continue;
				if (!N.second->getFunction())
				continue;
				if (Reachable.contains(N.second->getFunction()))
				continue;

				if (!checkIfSupported(N.second->getFunction(),
				dyn_cast<CallBase>(*N.first)))
				return PreservedAnalyses::none();

				Reachable.insert(N.second->getFunction());
				Tmp.push_back(N.second->getFunction());
				}
				} while (!std::empty(Tmp));
				}

				if (std::empty(Reachable))
				clearModule(M);
				else
				removeUnreachableFunctions(Reachable, M);

				maybeHandleGlobals(M);

				return PreservedAnalyses::none();
				}

				static constexpr std::pair<StringLiteral, StringLiteral> ReplaceMap[]{
				{"aligned_alloc", "__hipstdpar_aligned_alloc"},
				{"calloc", "__hipstdpar_calloc"},
				{"free", "__hipstdpar_free"},
				{"malloc", "__hipstdpar_malloc"},
				{"memalign", "__hipstdpar_aligned_alloc"},
				{"posix_memalign", "__hipstdpar_posix_aligned_alloc"},
				{"realloc", "__hipstdpar_realloc"},
				{"reallocarray", "__hipstdpar_realloc_array"},
				{"_ZdaPv", "__hipstdpar_operator_delete"},
				{"_ZdaPvm", "__hipstdpar_operator_delete_sized"},
				{"_ZdaPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
				{"_ZdaPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
				{"_ZdlPv", "__hipstdpar_operator_delete"},
				{"_ZdlPvm", "__hipstdpar_operator_delete_sized"},
				{"_ZdlPvSt11align_val_t", "__hipstdpar_operator_delete_aligned"},
				{"_ZdlPvmSt11align_val_t", "__hipstdpar_operator_delete_aligned_sized"},
				{"_Znam", "__hipstdpar_operator_new"},
				{"_ZnamRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
				{"_ZnamSt11align_val_t", "__hipstdpar_operator_new_aligned"},
				{"_ZnamSt11align_val_tRKSt9nothrow_t",
				"__hipstdpar_operator_new_aligned_nothrow"},

				{"_Znwm", "__hipstdpar_operator_new"},
				{"_ZnwmRKSt9nothrow_t", "__hipstdpar_operator_new_nothrow"},
				{"_ZnwmSt11align_val_t", "__hipstdpar_operator_new_aligned"},
				{"_ZnwmSt11align_val_tRKSt9nothrow_t",
				"__hipstdpar_operator_new_aligned_nothrow"},
				{"__builtin_calloc", "__hipstdpar_calloc"},
				{"__builtin_free", "__hipstdpar_free"},
				{"__builtin_malloc", "__hipstdpar_malloc"},
				{"__builtin_operator_delete", "__hipstdpar_operator_delete"},
				{"__builtin_operator_new", "__hipstdpar_operator_new"},
				{"__builtin_realloc", "__hipstdpar_realloc"},
				{"__libc_calloc", "__hipstdpar_calloc"},
				{"__libc_free", "__hipstdpar_free"},
				{"__libc_malloc", "__hipstdpar_malloc"},
				{"__libc_memalign", "__hipstdpar_aligned_alloc"},
				{"__libc_realloc", "__hipstdpar_realloc"}
				};

				PreservedAnalyses
				HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
				SmallDenseMap<StringRef, StringRef> AllocReplacements(std::cbegin(ReplaceMap),
				std::cend(ReplaceMap));

				for (auto &&F : M) {
				if (!F.hasName())
				continue;
				if (!AllocReplacements.contains(F.getName()))
				continue;

				if (auto R = M.getFunction(AllocReplacements[F.getName()])) {
				F.replaceAllUsesWith(R);
				} else {
				std::string W;
				raw_string_ostream OS(W);

				OS << "cannot be interposed, missing: " << AllocReplacements[F.getName()]
				<< ". Tried to run the allocation interposition pass without the "
				<< "replacement functions available.";

				F.getContext().diagnose(DiagnosticInfoUnsupported(F, W,
				F.getSubprogram(),
				DS_Warning));
				}
				}

				if (auto F = M.getFunction("__hipstdpar_hidden_free")) {
				auto LibcFree = M.getOrInsertFunction("__libc_free", F->getFunctionType(),
				F->getAttributes());
				F->replaceAllUsesWith(LibcFree.getCallee());

				eraseFromModule(*F);
				}

				return PreservedAnalyses::none();
				}

llvm/test/Transforms/HipStdPar/accelerator-code-selection.ll

This file was added.

				; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
				; RUN: %s \| FileCheck %s

				$_ZNK8CallableclEPi = comdat any
				$_ZNK8CallableclEPf = comdat any
				$_ZNK8Callable6mem_fnEPKi = comdat any
				$_ZN8Callable13static_mem_fnEPKi = comdat any
				; CHECK-NOT: $_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf
				$_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf = comdat any
				; CHECK-NOT: $_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf
				$_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf = comdat any

				%struct.Callable = type { [64 x i8] }

				; CHECK-NOT: @should_be_removed
				@llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr @should_be_removed], section "llvm.metadata"

				define void @should_be_removed(ptr %p) {
				ret void
				}

				declare void @llvm.trap()

				; CHECK: define {{.*}} @called_via_chain
				define void @called_via_chain(ptr %p) {
				entry:
				%tobool.not = icmp eq ptr %p, null
				br i1 %tobool.not, label %if.then, label %if.end

				if.then:
				tail call void @llvm.trap()
				unreachable

				if.end:
				ret void
				}

				; CHECK: define {{.*}} @directly_called
				define void @directly_called(ptr %p) {
				tail call void @called_via_chain(ptr %p)
				ret void
				}

				; CHECK: define {{.}} amdgpu_kernel {{.}} @accelerator_execution_root
				define hidden amdgpu_kernel void @accelerator_execution_root(ptr %p) {
				tail call void @directly_called(ptr %p)
				ret void
				}

				; CHECK-NOT: @defined_elsewhere_should_be_removed
				declare void @defined_elsewhere_should_be_removed(ptr)

				; CHECK: declare {{.*}} @defined_elsewhere_directly_called
				declare void @defined_elsewhere_directly_called(ptr)

				; CHECK: define {{.}} amdgpu_kernel {{.}} @another_accelerator_execution_root
				define hidden amdgpu_kernel void @another_accelerator_execution_root(ptr %p) {
				tail call void @defined_elsewhere_directly_called(ptr %p)
				ret void
				}

				; Also test passing a callable object (functor / lambda) to a kernel, which is
				; the common pattern for customising algorithms.

				; CHECK: define {{.}} amdgpu_kernel {{.}} @_Z22accelerator_execution_root_taking_callablePi8Callable
				define hidden amdgpu_kernel void @_Z22accelerator_execution_root_taking_callablePi8Callable(ptr noundef %p, ptr addrspace(4) nocapture readonly byref(%struct.Callable) align 8 %callable) {
				%callable_in_generic = addrspacecast ptr addrspace(4) %callable to ptr
				call void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %callable_in_generic, ptr noundef %p)

				ret void
				}

				; CHECK: define {{.*}} @_ZNK8CallableclEPi
				define linkonce_odr dso_local void @_ZNK8CallableclEPi(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) {
				call void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p)

				ret void
				}

				; CHECK: define {{.*}} @_ZNK8Callable6mem_fnEPKi
				define linkonce_odr dso_local void @_ZNK8Callable6mem_fnEPKi(ptr noundef nonnull align 1 dereferenceable(1) %this, ptr noundef %p) {
				call void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p)

				ret void
				}

				; CHECK: define {{.*}} @_ZN8Callable13static_mem_fnEPKi
				define linkonce_odr dso_local void @_ZN8Callable13static_mem_fnEPKi(ptr noundef %p) {
				ret void
				}

				; CHECK-NOT: define {{.*}} @_Z26non_kernel_taking_callablePf8Callable
				define dso_local void @_Z26non_kernel_taking_callablePf8Callable(ptr noundef %p, ptr noundef byval(%struct.Callable) align 8 %callable) {
				call void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %callable, ptr noundef %p)

				ret void
				}

				; CHECK-NOT: define {{.*}} @_ZNK8CallableclEPf
				define linkonce_odr dso_local void @_ZNK8CallableclEPf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) {
				call void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p)

				ret void
				}

				; CHECK-NOT: @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf
				define linkonce_odr dso_local void @_ZNK8Callable37another_mem_fn_which_will_get_removedEPKf(ptr noundef nonnull align 1 dereferenceable(64) %this, ptr noundef %p) {
				call void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p)

				ret void
				}

				; CHECK-NOT: @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf
				define linkonce_odr dso_local void @_ZN8Callable44another_static_mem_fn_which_will_get_removedEPKf(ptr noundef %p) {
				ret void
				}
				No newline at end of file

llvm/test/Transforms/HipStdPar/allocation-interposition.ll

This file was added.

				; RUN: opt -S -passes=hipstdpar-interpose-alloc %s \| FileCheck %s

				%"struct.std::nothrow_t" = type { i8 }

				@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1

				declare ptr @__hipstdpar_aligned_alloc(i64, i64)

				declare ptr @__hipstdpar_malloc(i64)

				declare ptr @__hipstdpar_calloc(i64, i64)

				declare i32 @__hipstdpar_posix_aligned_alloc(ptr, i64, i64)

				declare void @__hipstdpar_hidden_free(ptr)

				declare ptr @__hipstdpar_realloc(ptr, i64)

				declare ptr @__hipstdpar_realloc_array(ptr, i64, i64)

				declare void @__hipstdpar_free(ptr)

				declare ptr @__hipstdpar_operator_new_aligned(i64, i64)

				declare ptr @__hipstdpar_operator_new(i64)

				declare ptr @__hipstdpar_operator_new_nothrow(i64, %"struct.std::nothrow_t")

				declare ptr @__hipstdpar_operator_new_aligned_nothrow(i64, i64, %"struct.std::nothrow_t")

				declare void @__hipstdpar_operator_delete_aligned_sized(ptr, i64, i64)

				declare void @__hipstdpar_operator_delete(ptr)

				declare void @__hipstdpar_operator_delete_aligned(ptr, i64)

				declare void @__hipstdpar_operator_delete_sized(ptr, i64)

				define dso_local noundef i32 @allocs() {
				; CHECK: %1 = call noalias align 8 ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42)
				%1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %1)
				call void @free(ptr noundef %1)

				; CHECK: %2 = call noalias ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
				%2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %2)
				call void @free(ptr noundef %2)

				; CHECK: %3 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42)
				%3 = call noalias ptr @malloc(i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %3)
				call void @free(ptr noundef %3)

				; CHECK: %4 = call noalias align 8 ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42)
				%4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %4)
				call void @free(ptr noundef %4)

				%tmp = alloca ptr, align 8
				; CHECK: %5 = call i32 @__hipstdpar_posix_aligned_alloc(ptr noundef %tmp, i64 noundef 8, i64 noundef 42)
				%5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %tmp)
				call void @free(ptr noundef %tmp)

				; CHECK: %6 = call noalias ptr @__hipstdpar_malloc(i64 noundef 42)
				%6 = call noalias ptr @malloc(i64 noundef 42)
				; CHECK: %7 = call ptr @__hipstdpar_realloc(ptr noundef %6, i64 noundef 42)
				%7 = call ptr @realloc(ptr noundef %6, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %7)
				call void @free(ptr noundef %7)

				; CHECK: %8 = call noalias ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
				%8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
				; CHECK: %9 = call ptr @__hipstdpar_realloc_array(ptr noundef %8, i64 noundef 1, i64 noundef 42)
				%9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %9)
				call void @free(ptr noundef %9)

				; CHECK: %10 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 1)
				%10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1)
				; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %10)
				call void @_ZdlPv(ptr noundef %10)

				; CHECK: %11 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 1, i64 noundef 8)
				%11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8)
				; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %11, i64 noundef 8)
				call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8)

				; CHECK: %12 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				%12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %12)
				call void @_ZdlPv(ptr noundef %12)

				; CHECK: %13 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				%13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %13, i64 noundef 8)
				call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8)

				; CHECK: %14 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 42)
				%14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42)
				; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %14)
				call void @_ZdaPv(ptr noundef %14)

				; CHECK: %15 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8)
				%15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8)
				; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %15, i64 noundef 8)
				call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8)

				; CHECK: %16 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				%16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %16)
				call void @_ZdaPv(ptr noundef %16)

				; CHECK: %17 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				%17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %17, i64 noundef 8)
				call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8)

				; CHECK: %18 = call ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
				%18 = call ptr @calloc(i64 noundef 1, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %18)
				call void @free(ptr noundef %18)

				; CHECK: %19 = call ptr @__hipstdpar_malloc(i64 noundef 42)
				%19 = call ptr @malloc(i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %19)
				call void @free(ptr noundef %19)

				; CHECK: %20 = call noalias noundef nonnull ptr @__hipstdpar_operator_new(i64 noundef 42)
				%20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42)
				; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %20)
				call void @_ZdlPv(ptr noundef %20)

				; CHECK: %21 = call noalias noundef nonnull align 8 ptr @__hipstdpar_operator_new_aligned(i64 noundef 42, i64 noundef 8)
				%21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8)
				; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %21, i64 noundef 8)
				call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8)

				; CHECK: %22 = call noalias noundef ptr @__hipstdpar_operator_new_nothrow(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				%22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				; CHECK: call void @__hipstdpar_operator_delete(ptr noundef %22)
				call void @_ZdlPv(ptr noundef %22)

				; CHECK: %23 = call noalias noundef align 8 ptr @__hipstdpar_operator_new_aligned_nothrow(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				%23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				; CHECK: call void @__hipstdpar_operator_delete_aligned(ptr noundef %23, i64 noundef 8)
				call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8)

				; CHECK: %24 = call ptr @__hipstdpar_malloc(i64 noundef 42)
				%24 = call ptr @malloc(i64 noundef 42)
				; CHECK: %25 = call ptr @__hipstdpar_realloc(ptr noundef %24, i64 noundef 41)
				%25 = call ptr @realloc(ptr noundef %24, i64 noundef 41)
				; CHECK: call void @__hipstdpar_free(ptr noundef %25)
				call void @free(ptr noundef %25)

				; CHECK: %26 = call ptr @__hipstdpar_calloc(i64 noundef 1, i64 noundef 42)
				%26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %26)
				call void @__libc_free(ptr noundef %26)

				; CHECK: %27 = call ptr @__hipstdpar_malloc(i64 noundef 42)
				%27 = call ptr @__libc_malloc(i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %27)
				call void @__libc_free(ptr noundef %27)

				; CHECK: %28 = call ptr @__hipstdpar_aligned_alloc(i64 noundef 8, i64 noundef 42)
				%28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42)
				; CHECK: call void @__hipstdpar_free(ptr noundef %28)
				call void @__libc_free(ptr noundef %28)

				ret i32 0
				}

				declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef)

				declare void @free(ptr noundef)

				declare noalias ptr @calloc(i64 noundef, i64 noundef)

				declare noalias ptr @malloc(i64 noundef)

				declare noalias ptr @memalign(i64 noundef, i64 noundef)

				declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef)

				declare ptr @realloc(ptr noundef, i64 noundef)

				declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef)

				declare noundef nonnull ptr @_Znwm(i64 noundef)

				declare void @_ZdlPv(ptr noundef)

				declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef)

				declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef)

				declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare noundef nonnull ptr @_Znam(i64 noundef)

				declare void @_ZdaPv(ptr noundef)

				declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef)

				declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef)

				declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare ptr @__libc_calloc(i64 noundef, i64 noundef)

				declare void @__libc_free(ptr noundef)

				declare ptr @__libc_malloc(i64 noundef)

				declare ptr @__libc_memalign(i64 noundef, i64 noundef)
				No newline at end of file

llvm/test/Transforms/HipStdPar/allocation-no-interposition.ll

This file was added.

				; RUN: opt < %s -passes=hipstdpar-interpose-alloc -S 2>&1 \| FileCheck %s

				; CHECK: warning: {{.}} aligned_alloc {{.}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} free {{.}} cannot be interposed, missing: __hipstdpar_free. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} calloc {{.}} cannot be interposed, missing: __hipstdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} malloc {{.}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} memalign {{.}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} posix_memalign {{.}} cannot be interposed, missing: __hipstdpar_posix_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} realloc {{.}} cannot be interposed, missing: __hipstdpar_realloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} reallocarray {{.}} cannot be interposed, missing: __hipstdpar_realloc_array. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _Znwm {{.}} cannot be interposed, missing: __hipstdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZdlPv {{.}} cannot be interposed, missing: __hipstdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZnwmSt11align_val_t {{.}} cannot be interposed, missing: __hipstdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZdlPvSt11align_val_t {{.}} cannot be interposed, missing: __hipstdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZnwmRKSt9nothrow_t {{.}} cannot be interposed, missing: __hipstdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZnwmSt11align_val_tRKSt9nothrow_t {{.}} cannot be interposed, missing: __hipstdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _Znam {{.}} cannot be interposed, missing: __hipstdpar_operator_new. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZdaPv {{.}} cannot be interposed, missing: __hipstdpar_operator_delete. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZnamSt11align_val_t {{.}} cannot be interposed, missing: __hipstdpar_operator_new_aligned. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZdaPvSt11align_val_t {{.}} cannot be interposed, missing: __hipstdpar_operator_delete_aligned. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZnamRKSt9nothrow_t {{.}} cannot be interposed, missing: __hipstdpar_operator_new_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} _ZnamSt11align_val_tRKSt9nothrow_t {{.}} cannot be interposed, missing: __hipstdpar_operator_new_aligned_nothrow. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} __libc_calloc {{.}} cannot be interposed, missing: __hipstdpar_calloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} __libc_free {{.}} cannot be interposed, missing: __hipstdpar_free. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} __libc_malloc {{.}} cannot be interposed, missing: __hipstdpar_malloc. Tried to run the allocation interposition pass without the replacement functions available.
				; CHECK: warning: {{.}} __libc_memalign {{.}} cannot be interposed, missing: __hipstdpar_aligned_alloc. Tried to run the allocation interposition pass without the replacement functions available.

				%"struct.std::nothrow_t" = type { i8 }

				@_ZSt7nothrow = external global %"struct.std::nothrow_t", align 1

				define dso_local noundef i32 @allocs() {
				%1 = call noalias align 8 ptr @aligned_alloc(i64 noundef 8, i64 noundef 42)
				call void @free(ptr noundef %1)

				%2 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
				call void @free(ptr noundef %2)

				%3 = call noalias ptr @malloc(i64 noundef 42)
				call void @free(ptr noundef %3)

				%4 = call noalias align 8 ptr @memalign(i64 noundef 8, i64 noundef 42)
				call void @free(ptr noundef %4)

				%tmp = alloca ptr, align 8
				%5 = call i32 @posix_memalign(ptr noundef %tmp, i64 noundef 8, i64 noundef 42)
				call void @free(ptr noundef %tmp)

				%6 = call noalias ptr @malloc(i64 noundef 42)
				%7 = call ptr @realloc(ptr noundef %6, i64 noundef 42)
				call void @free(ptr noundef %7)

				%8 = call noalias ptr @calloc(i64 noundef 1, i64 noundef 42)
				%9 = call ptr @reallocarray(ptr noundef %8, i64 noundef 1, i64 noundef 42)
				call void @free(ptr noundef %9)

				%10 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 1)
				call void @_ZdlPv(ptr noundef %10)

				%11 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 1, i64 noundef 8)
				call void @_ZdlPvSt11align_val_t(ptr noundef %11, i64 noundef 8)

				%12 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 1, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				call void @_ZdlPv(ptr noundef %12)

				%13 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 1, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				call void @_ZdlPvSt11align_val_t(ptr noundef %13, i64 noundef 8)

				%14 = call noalias noundef nonnull ptr @_Znam(i64 noundef 42)
				call void @_ZdaPv(ptr noundef %14)

				%15 = call noalias noundef nonnull align 8 ptr @_ZnamSt11align_val_t(i64 noundef 42, i64 noundef 8)
				call void @_ZdaPvSt11align_val_t(ptr noundef %15, i64 noundef 8)

				%16 = call noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				call void @_ZdaPv(ptr noundef %16)

				%17 = call noalias noundef align 8 ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				call void @_ZdaPvSt11align_val_t(ptr noundef %17, i64 noundef 8)

				%18 = call ptr @calloc(i64 noundef 1, i64 noundef 42)
				call void @free(ptr noundef %18)

				%19 = call ptr @malloc(i64 noundef 42)
				call void @free(ptr noundef %19)

				%20 = call noalias noundef nonnull ptr @_Znwm(i64 noundef 42)
				call void @_ZdlPv(ptr noundef %20)

				%21 = call noalias noundef nonnull align 8 ptr @_ZnwmSt11align_val_t(i64 noundef 42, i64 noundef 8)
				call void @_ZdlPvSt11align_val_t(ptr noundef %21, i64 noundef 8)

				%22 = call noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef 42, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				call void @_ZdlPv(ptr noundef %22)

				%23 = call noalias noundef align 8 ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef 42, i64 noundef 8, ptr noundef nonnull align 1 dereferenceable(1) @_ZSt7nothrow)
				call void @_ZdlPvSt11align_val_t(ptr noundef %23, i64 noundef 8)

				%24 = call ptr @malloc(i64 noundef 42)
				%25 = call ptr @realloc(ptr noundef %24, i64 noundef 41)
				call void @free(ptr noundef %25)

				%26 = call ptr @__libc_calloc(i64 noundef 1, i64 noundef 42)
				call void @__libc_free(ptr noundef %26)

				%27 = call ptr @__libc_malloc(i64 noundef 42)
				call void @__libc_free(ptr noundef %27)

				%28 = call ptr @__libc_memalign(i64 noundef 8, i64 noundef 42)
				call void @__libc_free(ptr noundef %28)

				ret i32 0
				}

				declare noalias ptr @aligned_alloc(i64 noundef, i64 noundef)

				declare void @free(ptr noundef)

				declare noalias ptr @calloc(i64 noundef, i64 noundef)

				declare noalias ptr @malloc(i64 noundef)

				declare noalias ptr @memalign(i64 noundef, i64 noundef)

				declare i32 @posix_memalign(ptr noundef, i64 noundef, i64 noundef)

				declare ptr @realloc(ptr noundef, i64 noundef)

				declare ptr @reallocarray(ptr noundef, i64 noundef, i64 noundef)

				declare noundef nonnull ptr @_Znwm(i64 noundef)

				declare void @_ZdlPv(ptr noundef)

				declare noalias noundef nonnull ptr @_ZnwmSt11align_val_t(i64 noundef, i64 noundef)

				declare void @_ZdlPvSt11align_val_t(ptr noundef, i64 noundef)

				declare noalias noundef ptr @_ZnwmRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare noalias noundef ptr @_ZnwmSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare noundef nonnull ptr @_Znam(i64 noundef)

				declare void @_ZdaPv(ptr noundef)

				declare noalias noundef nonnull ptr @_ZnamSt11align_val_t(i64 noundef, i64 noundef)

				declare void @_ZdaPvSt11align_val_t(ptr noundef, i64 noundef)

				declare noalias noundef ptr @_ZnamRKSt9nothrow_t(i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare noalias noundef ptr @_ZnamSt11align_val_tRKSt9nothrow_t(i64 noundef, i64 noundef, ptr noundef nonnull align 1 dereferenceable(1))

				declare ptr @__libc_calloc(i64 noundef, i64 noundef)

				declare void @__libc_free(ptr noundef)

				declare ptr @__libc_malloc(i64 noundef)

				declare ptr @__libc_memalign(i64 noundef, i64 noundef)
				No newline at end of file

llvm/test/Transforms/HipStdPar/unsupported-asm.ll

This file was added.

				; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
				; RUN: %s 2>&1 \| FileCheck %s

				; CHECK: error: {{.*}} in function foo void (): Accelerator does not support the ASM block:
				; CHECK-NEXT: {{.}}Invalid ASM block{{.}}
				define amdgpu_kernel void @foo() {
				entry:
				call void @__ASM__hipstdpar_unsupported([18 x i8] c"Invalid ASM block\00")
				ret void
				}

				declare void @__ASM__hipstdpar_unsupported([18 x i8])
				No newline at end of file

llvm/test/Transforms/HipStdPar/unsupported-builtins.ll

This file was added.

				; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
				; RUN: %s 2>&1 \| FileCheck %s

				; CHECK: error: {{.*}} in function foo void (): Accelerator does not support the __builtin_ia32_pause function
				define amdgpu_kernel void @foo() {
				entry:
				call void @__builtin_ia32_pause__hipstdpar_unsupported()
				ret void
				}

				declare void @__builtin_ia32_pause__hipstdpar_unsupported()
				No newline at end of file

llvm/test/Transforms/HipStdPar/unsupported-thread-local-direct-use.ll

This file was added.

				; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
				; RUN: %s 2>&1 \| FileCheck %s

				@tls = hidden thread_local addrspace(1) global i32 0, align 4

				; CHECK: error: {{.*}} in function direct_use void (): Accelerator does not support the thread_local variable tls
				define amdgpu_kernel void @direct_use() {
				entry:
				%0 = call align 4 ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @tls)
				%1 = load i32, ptr addrspace(1) %0, align 4
				ret void
				}

				declare nonnull ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) nonnull)

llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll

This file was added.

				; RUN: not opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
				; RUN: %s 2>&1 \| FileCheck %s

				@tls = hidden thread_local addrspace(1) global i32 0, align 4

				; CHECK: error: {{.*}} in function indirect_use void (): Accelerator does not support the thread_local variable tls
				define amdgpu_kernel void @indirect_use() {
				entry:
				%0 = call align 4 ptr @llvm.threadlocal.address.p0(ptr addrspacecast (ptr addrspace(1) @tls to ptr))
				%1 = load i32, ptr %0, align 4
				ret void
				}

				declare nonnull ptr @llvm.threadlocal.address.p0(ptr nonnull)

This is an archive of the discontinued LLVM Phabricator instance.

[HIP][LLVM][Opt][AMDGPU][RFC] Add LLVM support for C++ Parallel Algorithm Offload
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 557687

llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h

llvm/lib/Passes/CMakeLists.txt

llvm/lib/Passes/PassBuilder.cpp

llvm/lib/Passes/PassBuilderPipelines.cpp

llvm/lib/Passes/PassRegistry.def

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

llvm/lib/Target/AMDGPU/CMakeLists.txt

llvm/lib/Transforms/CMakeLists.txt

llvm/lib/Transforms/HipStdPar/CMakeLists.txt

llvm/lib/Transforms/HipStdPar/HipStdPar.cpp

llvm/test/Transforms/HipStdPar/accelerator-code-selection.ll

llvm/test/Transforms/HipStdPar/allocation-interposition.ll

llvm/test/Transforms/HipStdPar/allocation-no-interposition.ll

llvm/test/Transforms/HipStdPar/unsupported-asm.ll

llvm/test/Transforms/HipStdPar/unsupported-builtins.ll

llvm/test/Transforms/HipStdPar/unsupported-thread-local-direct-use.ll

llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll

This is an archive of the discontinued LLVM Phabricator instance.

[HIP][LLVM][Opt][AMDGPU][RFC] Add LLVM support for C++ Parallel Algorithm OffloadClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 557687

llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h

llvm/lib/Passes/CMakeLists.txt

llvm/lib/Passes/PassBuilder.cpp

llvm/lib/Passes/PassBuilderPipelines.cpp

llvm/lib/Passes/PassRegistry.def

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

llvm/lib/Target/AMDGPU/CMakeLists.txt

llvm/lib/Transforms/CMakeLists.txt

llvm/lib/Transforms/HipStdPar/CMakeLists.txt

llvm/lib/Transforms/HipStdPar/HipStdPar.cpp

llvm/test/Transforms/HipStdPar/accelerator-code-selection.ll

llvm/test/Transforms/HipStdPar/allocation-interposition.ll

llvm/test/Transforms/HipStdPar/allocation-no-interposition.ll

llvm/test/Transforms/HipStdPar/unsupported-asm.ll

llvm/test/Transforms/HipStdPar/unsupported-builtins.ll

llvm/test/Transforms/HipStdPar/unsupported-thread-local-direct-use.ll

llvm/test/Transforms/HipStdPar/unsupported-thread-local-indirect-use.ll

[HIP][LLVM][Opt][AMDGPU][RFC] Add LLVM support for C++ Parallel Algorithm Offload
ClosedPublic